From d17fea9f4160084012c9596029dfeba3220a5ff3 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 1 Dec 2018 15:41:24 +0000 Subject: [PATCH 1/6] Vendor import of llvm release_70 branch r348011: https://llvm.org/svn/llvm-project/llvm/branches/release_70@348011 --- include/llvm/MC/MCAsmBackend.h | 5 + .../llvm/Support/GenericDomTreeConstruction.h | 14 + include/llvm/Transforms/Utils/SSAUpdater.h | 4 + .../llvm/Transforms/Utils/SSAUpdaterImpl.h | 7 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 5 +- lib/MC/MCExpr.cpp | 5 + .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 9 + lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 2 + .../Mips/MCTargetDesc/MipsELFStreamer.cpp | 17 + .../Mips/MCTargetDesc/MipsELFStreamer.h | 7 + lib/Target/Mips/MicroMips32r6InstrInfo.td | 2 +- lib/Target/Mips/Mips64InstrInfo.td | 5 +- lib/Target/Mips/Mips64r6InstrInfo.td | 3 + lib/Target/Mips/MipsFastISel.cpp | 5 + lib/Target/Mips/MipsInstrFPU.td | 8 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 17 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 12 + lib/Target/Mips/MipsSEInstrInfo.cpp | 11 +- lib/Target/PowerPC/P9InstrResources.td | 1 + lib/Target/PowerPC/PPCISelLowering.cpp | 11 - lib/Target/PowerPC/PPCInstrVSX.td | 91 ++++- lib/Transforms/Utils/LCSSA.cpp | 16 + lib/Transforms/Utils/SSAUpdater.cpp | 5 + test/CodeGen/Mips/Fast-ISel/bricmpi1.ll | 189 ++++++++++ ...ldpairf64-extractelementf64-implicit-sp.ll | 32 ++ test/CodeGen/Mips/longbranch.ll | 7 +- test/CodeGen/Mips/micromips-b-range.ll | 98 +++++ .../Mips/micromips-gcc-except-table.ll | 37 ++ test/CodeGen/Mips/micromips-mtc-mfc.ll | 68 ++++ ...nk-wrap-buildpairf64-extractelementf64.mir | 150 ++++++++ test/CodeGen/Mips/tls.ll | 8 +- test/CodeGen/PowerPC/VSX-XForm-Scalars.ll | 61 ++-- test/CodeGen/PowerPC/build-vector-tests.ll | 44 ++- test/CodeGen/PowerPC/load-v4i8-improved.ll | 22 +- .../PowerPC/power9-moves-and-splats.ll | 246 +++++++++---- test/CodeGen/PowerPC/pr38087.ll | 5 +- test/CodeGen/PowerPC/qpx-load-splat.ll | 62 ++-- test/CodeGen/PowerPC/scalar_vector_test_1.ll | 292 +++++++++++++++ test/CodeGen/PowerPC/scalar_vector_test_2.ll | 118 ++++++ test/CodeGen/PowerPC/scalar_vector_test_3.ll | 265 ++++++++++++++ test/CodeGen/PowerPC/scalar_vector_test_4.ll | 341 ++++++++++++++++++ test/CodeGen/PowerPC/swaps-le-6.ll | 89 +++-- test/CodeGen/PowerPC/vsx_insert_extract_le.ll | 123 +++++-- test/CodeGen/X86/mingw-comdats.ll | 33 +- test/DebugInfo/Mips/eh_frame.ll | 38 ++ .../LCSSA/rewrite-existing-dbg-values.ll | 69 ++++ tools/llvm-exegesis/lib/CMakeLists.txt | 8 +- 47 files changed, 2386 insertions(+), 281 deletions(-) create mode 100644 test/CodeGen/Mips/Fast-ISel/bricmpi1.ll create mode 100644 test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll create mode 100644 test/CodeGen/Mips/micromips-b-range.ll create mode 100644 test/CodeGen/Mips/micromips-gcc-except-table.ll create mode 100644 test/CodeGen/Mips/micromips-mtc-mfc.ll create mode 100644 test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir create mode 100644 test/CodeGen/PowerPC/scalar_vector_test_1.ll create mode 100644 test/CodeGen/PowerPC/scalar_vector_test_2.ll create mode 100644 test/CodeGen/PowerPC/scalar_vector_test_3.ll create mode 100644 test/CodeGen/PowerPC/scalar_vector_test_4.ll create mode 100644 test/DebugInfo/Mips/eh_frame.ll create mode 100644 test/Transforms/LCSSA/rewrite-existing-dbg-values.ll diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 030d3c05aa5a..07835c21fced 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -165,6 +165,11 @@ class MCAsmBackend { return 0; } + /// Check whether a given symbol has been flagged with MICROMIPS flag. + virtual bool isMicroMips(const MCSymbol *Sym) const { + return false; + } + /// Handles all target related code padding when starting to write a new /// basic block to an object file. /// diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index 103ff8ca476a..977f209f92b3 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -1186,6 +1186,20 @@ struct SemiNCAInfo { << '\t' << U << "\n"); LLVM_DEBUG(dbgs() << "\n"); + // Recalculate the DominatorTree when the number of updates + // exceeds a threshold, which usually makes direct updating slower than + // recalculation. We select this threshold proportional to the + // size of the DominatorTree. The constant is selected + // by choosing the one with an acceptable performance on some real-world + // inputs. + + // Make unittests of the incremental algorithm work + if (DT.DomTreeNodes.size() <= 100) { + if (NumLegalized > DT.DomTreeNodes.size()) + CalculateFromScratch(DT, &BUI); + } else if (NumLegalized > DT.DomTreeNodes.size() / 40) + CalculateFromScratch(DT, &BUI); + // If the DominatorTree was recalculated at some point, stop the batch // updates. Full recalculations ignore batch updates and look at the actual // CFG. diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h index 4a7911662990..d02607acbbb5 100644 --- a/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/include/llvm/Transforms/Utils/SSAUpdater.h @@ -76,6 +76,10 @@ class SSAUpdater { /// block. bool HasValueForBlock(BasicBlock *BB) const; + /// Return the value for the specified block if the SSAUpdater has one, + /// otherwise return nullptr. + Value *FindValueForBlock(BasicBlock *BB) const; + /// Construct SSA form, materializing a value that is live at the end /// of the specified block. Value *GetValueAtEndOfBlock(BasicBlock *BB); diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index b7649ba88334..cab0f3e71575 100644 --- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -357,10 +357,9 @@ class SSAUpdaterImpl { BBInfo *Info = *I; if (Info->DefBB != Info) { - // Record the available value at join nodes to speed up subsequent - // uses of this SSAUpdater for the same value. - if (Info->NumPreds > 1) - (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal; + // Record the available value to speed up subsequent uses of this + // SSAUpdater for the same value. + (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal; continue; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f6b91a2f0231..16140f0b12be 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1156,10 +1156,11 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( MCSymbol *Sym = TM.getSymbol(ComdatGV); StringRef COMDATSymName = Sym->getName(); - // Append "$symbol" to the section name when targetting mingw. The ld.bfd + // Append "$symbol" to the section name *before* IR-level mangling is + // applied when targetting mingw. This is what GCC does, and the ld.bfd // COFF linker will not properly handle comdats otherwise. if (getTargetTriple().isWindowsGNUEnvironment()) - raw_svector_ostream(Name) << '$' << COMDATSymName; + raw_svector_ostream(Name) << '$' << ComdatGV->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection, UniqueID); diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index a4c99a0c1c15..ef6f0041e0c8 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -524,6 +524,11 @@ static void AttemptToFoldSymbolOffsetDifference( if (Asm->isThumbFunc(&SA)) Addend |= 1; + // If symbol is labeled as micromips, we set low-bit to ensure + // correct offset in .gcc_except_table + if (Asm->getBackend().isMicroMips(&SA)) + Addend |= 1; + // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = nullptr; diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 4397c971d080..3b1b94acb149 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -568,6 +569,14 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm, } } +bool MipsAsmBackend::isMicroMips(const MCSymbol *Sym) const { + if (const auto *ElfSym = dyn_cast(Sym)) { + if (ElfSym->getOther() & ELF::STO_MIPS_MICROMIPS) + return true; + } + return false; +} + MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 3d5e16fcf9b4..30359132e92b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -25,6 +25,7 @@ class MCAssembler; struct MCFixupKindInfo; class MCObjectWriter; class MCRegisterInfo; +class MCSymbolELF; class Target; class MipsAsmBackend : public MCAsmBackend { @@ -90,6 +91,7 @@ class MipsAsmBackend : public MCAsmBackend { bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target) override; + bool isMicroMips(const MCSymbol *Sym) const override; }; // class MipsAsmBackend } // namespace diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 7b9a02503ce2..21b01e850967 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbolELF.h" @@ -53,6 +54,22 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst, createPendingLabelRelocs(); } +void MipsELFStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { + Frame.Begin = getContext().createTempSymbol(); + MCELFStreamer::EmitLabel(Frame.Begin); +} + +MCSymbol *MipsELFStreamer::EmitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCELFStreamer::EmitLabel(Label); + return Label; +} + +void MipsELFStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + Frame.End = getContext().createTempSymbol(); + MCELFStreamer::EmitLabel(Frame.End); +} + void MipsELFStreamer::createPendingLabelRelocs() { MipsTargetELFStreamer *ELFTargetStreamer = static_cast(getTargetStreamer()); diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index d141f5d77c61..56a0ff96c7bd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -26,6 +26,7 @@ class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCSubtargetInfo; +struct MCDwarfFrameInfo; class MipsELFStreamer : public MCELFStreamer { SmallVector, 8> MipsOptionRecords; @@ -60,6 +61,12 @@ class MipsELFStreamer : public MCELFStreamer { void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; void EmitIntValue(uint64_t Value, unsigned Size) override; + // Overriding these functions allows us to avoid recording of these labels + // in EmitLabel and later marking them as microMIPS. + void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; + void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *EmitCFILabel() override; + /// Emits all the option records stored up until the point it's called. void EmitMipsOptionRecords(); diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index f795112ae2b7..6b0aa7756eab 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1733,7 +1733,7 @@ defm S_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; defm D_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6; -def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1 ZERO))>, ISA_MICROMIPS32R6; +def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 878ec29b188d..b5317bec70c4 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -838,7 +838,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), (SUBu GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (MUL GPR32:$src, GPR32:$src2), sub_32)>; + (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6; def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (PseudoMFHI ACC64:$src), sub_32)>; @@ -1139,3 +1139,6 @@ def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs), "sltu\t$rs, $rt, $imm">, GPR_64; def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs, imm64:$imm)>, GPR_64; + +def : MipsInstAlias<"rdhwr $rt, $rs", + (RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index 9df802cc30b9..ac223bc77256 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -301,6 +301,9 @@ def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f), // Patterns used for matching away redundant sign extensions. // MIPS32 arithmetic instructions sign extend their result implicitly. +def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_R6 GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; def : MipsPat<(i64 (sext (i32 (sdiv GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (DIV GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 19b30a44e86a..22ade31a72cd 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -953,6 +953,11 @@ bool MipsFastISel::selectBranch(const Instruction *I) { MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // For now, just try the simplest case where it's fed by a compare. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { + MVT CIMVT = + TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT(); + if (CIMVT == MVT::i1) + return false; + unsigned CondReg = getRegForValue(CI); BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) .addReg(CondReg) diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index dd30e20a743c..e986942ad8fa 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -485,14 +485,14 @@ let AdditionalPredicates = [NotInMicroMips] in { def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM<6>, ISA_MIPS1; - def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, - bitconvert>, MFC1_FM<0>, ISA_MIPS1; + def MFC1 : MMRel, StdMMR6Rel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, + bitconvert>, MFC1_FM<0>, ISA_MIPS1; def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>, ISA_MIPS1, FGR_64 { let DecoderNamespace = "MipsFP64"; } - def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, - bitconvert>, MFC1_FM<4>, ISA_MIPS1; + def MTC1 : MMRel, StdMMR6Rel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, + bitconvert>, MFC1_FM<4>, ISA_MIPS1; def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>, ISA_MIPS1, FGR_64 { let DecoderNamespace = "MipsFP64"; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 687c9f676b34..ef1b3c09bdc4 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -299,8 +299,12 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB, // register). Unfortunately, we have to make this decision before register // allocation so for now we use a spill/reload sequence for all // double-precision values in regardless of being an odd/even register. - if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || - (FP64 && !Subtarget.useOddSPReg())) { + // + // For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as + // implicit operand, so other passes (like ShrinkWrapping) are aware that + // stack is used. + if (I->getNumOperands() == 4 && I->getOperand(3).isReg() + && I->getOperand(3).getReg() == Mips::SP) { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(); unsigned HiReg = I->getOperand(2).getReg(); @@ -360,9 +364,12 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB, // register). Unfortunately, we have to make this decision before register // allocation so for now we use a spill/reload sequence for all // double-precision values in regardless of being an odd/even register. - - if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) || - (FP64 && !Subtarget.useOddSPReg())) { + // + // For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as + // implicit operand, so other passes (like ShrinkWrapping) are aware that + // stack is used. + if (I->getNumOperands() == 4 && I->getOperand(3).isReg() + && I->getOperand(3).getReg() == Mips::SP) { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = Op1.getReg(); unsigned N = Op2.getImm(); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 599c1e913acf..cf2899dd375e 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -238,6 +238,18 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { case Mips::WRDSP: addDSPCtrlRegOperands(true, MI, MF); break; + case Mips::BuildPairF64_64: + case Mips::ExtractElementF64_64: + if (!Subtarget->useOddSPReg()) { + MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); + break; + } + // fallthrough + case Mips::BuildPairF64: + case Mips::ExtractElementF64: + if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1()) + MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true)); + break; default: replaceUsesWithZeroReg(MRI, MI); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 7ffe4aff474d..e8589fc53492 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -25,9 +25,14 @@ using namespace llvm; +static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { + if (STI.inMicroMipsMode()) + return STI.isPositionIndependent() ? Mips::B_MM : Mips::J_MM; + return STI.isPositionIndependent() ? Mips::B : Mips::J; +} + MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) - : MipsInstrInfo(STI, STI.isPositionIndependent() ? Mips::B : Mips::J), - RI() {} + : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {} const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { return RI; @@ -643,7 +648,7 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || Opc == Mips::J || - Opc == Mips::B_MM || Opc == Mips::BEQZC_MM || + Opc == Mips::J_MM || Opc == Mips::B_MM || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM || Opc == Mips::BEQC || Opc == Mips::BNEC || Opc == Mips::BLTC || Opc == Mips::BGEC || Opc == Mips::BLTUC || Opc == Mips::BGEUC || Opc == Mips::BGTZC || Opc == Mips::BLEZC || diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td index 34df8452fe16..c6cbb9037ede 100644 --- a/lib/Target/PowerPC/P9InstrResources.td +++ b/lib/Target/PowerPC/P9InstrResources.td @@ -592,6 +592,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], XXPERM, XXPERMR, XXSLDWI, + XXSLDWIs, XXSPLTIB, XXSPLTW, XXSPLTWs, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 331dbcbbe060..b5bdf47ce37a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8454,17 +8454,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); - // If the source for the shuffle is a scalar_to_vector that came from a - // 32-bit load, it will have used LXVWSX so we don't need to splat again. - if (Subtarget.hasP9Vector() && - ((isLittleEndian && SplatIdx == 3) || - (!isLittleEndian && SplatIdx == 0))) { - SDValue Src = V1.getOperand(0); - if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && - Src.getOperand(0).getOpcode() == ISD::LOAD && - Src.getOperand(0).hasOneUse()) - return V1; - } SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 183512acaf9e..781a3277441a 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -877,6 +877,12 @@ let Uses = [RM] in { "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, imm32SExt16:$SHW))]>; + + let isCodeGenOnly = 1 in + def XXSLDWIs : XX3Form_2s<60, 2, + (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW), + "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>; + def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, @@ -886,6 +892,7 @@ let Uses = [RM] in { def XXSPLTWs : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + } // hasSideEffects } // UseVSXReg = 1 @@ -1466,8 +1473,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), - (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; // Instructions for converting float to i64 feeding a store. let Predicates = [NoP9Vector] in { @@ -3050,13 +3055,47 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector - (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))), - (v4f32 (LXVWSX xoaddr:$src))>; + + let AddedComplexity = 400 in { + // LIWAX - This instruction is used for sign extending i32 -> i64. + // LIWZX - This instruction will be emitted for i32, f32, and when + // zero-extending i32 to i64 (zext i32 -> i64). + let Predicates = [IsLittleEndian] in { + + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + } + + } // Build vectors from i8 loads def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), @@ -3218,6 +3257,39 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), (f32 (DFLOADf32 ixaddr:$src))>; + + let AddedComplexity = 400 in { + // The following pseudoinstructions are used to ensure the utilization + // of all 64 VSX registers. + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + } + } + let Predicates = [IsBigEndian, HasP9Vector] in { // (Un)Signed DWord vector extract -> QP @@ -3932,3 +4004,4 @@ let AddedComplexity = 400 in { (v4i32 (VEXTSH2W $A))>; } } + diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index a1f8e7484bcf..53d444b309d5 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils.h" @@ -201,6 +202,21 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, SSAUpdate.RewriteUse(*UseToRewrite); } + SmallVector DbgValues; + llvm::findDbgValues(DbgValues, I); + + // Update pre-existing debug value uses that reside outside the loop. + auto &Ctx = I->getContext(); + for (auto DVI : DbgValues) { + BasicBlock *UserBB = DVI->getParent(); + if (InstBB == UserBB || L->contains(UserBB)) + continue; + // We currently only handle debug values residing in blocks where we have + // inserted a PHI instruction. + if (Value *V = SSAUpdate.FindValueForBlock(UserBB)) + DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V))); + } + // SSAUpdater might have inserted phi-nodes inside other loops. We'll need // to post-process them to keep LCSSA form. for (PHINode *InsertedPN : InsertedPHIs) { diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 4a1fd8d571aa..9e5fb0e7172d 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -64,6 +64,11 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { return getAvailableVals(AV).count(BB); } +Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const { + AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB); + return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr; +} + void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { assert(ProtoType && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && diff --git a/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll b/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll new file mode 100644 index 000000000000..47b3c92203d1 --- /dev/null +++ b/test/CodeGen/Mips/Fast-ISel/bricmpi1.ll @@ -0,0 +1,189 @@ +; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel=true -mcpu=mips32r2 \ +; RUN: < %s -verify-machineinstrs | FileCheck %s + +define void @testeq(i32, i32) { +; CHECK-LABEL: testeq: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: beq $[[REG0]], $[[REG1]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp eq i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testne(i32, i32) { +; CHECK-LABEL: testne: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: bne $[[REG0]], $[[REG1]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ne i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testugt(i32, i32) { +; CHECK-LABEL: testugt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ugt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testuge(i32, i32) { +; CHECK-LABEL: testuge: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp uge i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testult(i32, i32) { +; CHECK-LABEL: testult: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ult i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testule(i32, i32) { +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp ule i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsgt(i32, i32) { +; CHECK-LABEL: testsgt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sgt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsge(i32, i32) { +; CHECK-LABEL: testsge: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sge i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testslt(i32, i32) { +; CHECK-LABEL: testslt: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]] +; CHECK: bnez $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp slt i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +define void @testsle(i32, i32) { +; CHECK-LABEL: testsle: +; CHECK: andi $[[REG0:[0-9]+]], $4, 1 +; CHECK: negu $[[REG0]], $[[REG0]] +; CHECK: andi $[[REG1:[0-9]+]], $5, 1 +; CHECK: negu $[[REG1]], $[[REG1]] +; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]] +; CHECK: beqz $[[REG2]], + %3 = trunc i32 %0 to i1 + %4 = trunc i32 %1 to i1 + %5 = icmp sle i1 %3, %4 + br i1 %5, label %end, label %trap +trap: + call void @llvm.trap() + br label %end +end: + ret void +} + + +declare void @llvm.trap() diff --git a/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll b/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll new file mode 100644 index 000000000000..7847fc89371b --- /dev/null +++ b/test/CodeGen/Mips/buildpairf64-extractelementf64-implicit-sp.ll @@ -0,0 +1,32 @@ +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32 -mattr=+fpxx \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=FPXX-IMPLICIT-SP + +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32r6 -mattr=+fp64,+nooddspreg \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=FP64-IMPLICIT-SP + +; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \ +; RUN: -mcpu=mips32r2 -mattr=+fpxx \ +; RUN: -stop-after=expand-isel-pseudos | \ +; RUN: FileCheck %s -check-prefix=NO-IMPLICIT-SP + +define double @foo2(i32 signext %v1, double %d1) { +entry: +; FPXX-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; FPXX-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp +; FP64-IMPLICIT-SP: BuildPairF64_64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; FP64-IMPLICIT-SP: ExtractElementF64_64 killed %{{[0-9]+}}, 1, implicit $sp +; NO-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}} +; NO-IMPLICIT-SP-NOT: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp +; NO-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1 +; NO-IMPLICIT-SP-NOT: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp + %conv = fptrunc double %d1 to float + %0 = tail call float @llvm.copysign.f32(float 1.000000e+00, float %conv) + %conv1 = fpext float %0 to double + ret double %conv1 +} + +declare float @llvm.copysign.f32(float, float) diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index 74cd1fe3aa4f..43eaa50daf05 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -231,16 +231,13 @@ define void @test1(i32 signext %s) { ; MICROMIPSSTATIC: # %bb.0: # %entry ; MICROMIPSSTATIC-NEXT: bnezc $4, $BB0_2 ; MICROMIPSSTATIC-NEXT: # %bb.1: # %entry -; MICROMIPSSTATIC-NEXT: j $BB0_4 -; MICROMIPSSTATIC-NEXT: nop -; MICROMIPSSTATIC-NEXT: $BB0_2: # %entry ; MICROMIPSSTATIC-NEXT: j $BB0_3 ; MICROMIPSSTATIC-NEXT: nop -; MICROMIPSSTATIC-NEXT: $BB0_3: # %then +; MICROMIPSSTATIC-NEXT: $BB0_2: # %then ; MICROMIPSSTATIC-NEXT: lui $1, %hi(x) ; MICROMIPSSTATIC-NEXT: li16 $2, 1 ; MICROMIPSSTATIC-NEXT: sw $2, %lo(x)($1) -; MICROMIPSSTATIC-NEXT: $BB0_4: # %end +; MICROMIPSSTATIC-NEXT: $BB0_3: # %end ; MICROMIPSSTATIC-NEXT: jrc $ra ; ; MICROMIPSR6STATIC-LABEL: test1: diff --git a/test/CodeGen/Mips/micromips-b-range.ll b/test/CodeGen/Mips/micromips-b-range.ll new file mode 100644 index 000000000000..f761d1c31d32 --- /dev/null +++ b/test/CodeGen/Mips/micromips-b-range.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=mips -relocation-model=pic -mattr=+micromips \ +; RUN: -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s + +; CHECK-LABEL: foo: +; CHECK-NEXT: 0: 41 a2 00 00 lui $2, 0 +; CHECK-NEXT: 4: 30 42 00 00 addiu $2, $2, 0 +; CHECK-NEXT: 8: 03 22 11 50 addu $2, $2, $25 +; CHECK-NEXT: c: fc 42 00 00 lw $2, 0($2) +; CHECK-NEXT: 10: 69 20 lw16 $2, 0($2) +; CHECK-NEXT: 12: 40 c2 00 14 bgtz $2, 44 +; CHECK-NEXT: 16: 00 00 00 00 nop +; CHECK-NEXT: 1a: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 26: 40 60 00 02 bal 8 +; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128 +; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 36: 00 01 0f 3c jr $1 +; CHECK-NEXT: 3a: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 3e: 94 00 00 02 b 8 +; CHECK-NEXT: 42: 00 00 00 00 nop +; CHECK-NEXT: 46: 30 20 4e 1f addiu $1, $zero, 19999 +; CHECK-NEXT: 4a: b4 22 00 14 bne $2, $1, 44 +; CHECK-NEXT: 4e: 00 00 00 00 nop +; CHECK-NEXT: 52: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 5e: 40 60 00 02 bal 8 +; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116 +; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 6e: 00 01 0f 3c jr $1 +; CHECK-NEXT: 72: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 76: 30 20 27 0f addiu $1, $zero, 9999 +; CHECK-NEXT: 7a: 94 22 00 14 beq $2, $1, 44 +; CHECK-NEXT: 7e: 00 00 00 00 nop +; CHECK-NEXT: 82: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 8e: 40 60 00 02 bal 8 +; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068 +; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 9e: 00 01 0f 3c jr $1 +; CHECK-NEXT: a2: 33 bd 00 08 addiu $sp, $sp, 8 + +; CHECK: 10466: 00 00 00 00 nop +; CHECK-NEXT: 1046a: 94 00 00 02 b 8 +; CHECK-NEXT: 1046e: 00 00 00 00 nop +; CHECK-NEXT: 10472: 33 bd ff f8 addiu $sp, $sp, -8 +; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp) +; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1 +; CHECK-NEXT: 1047e: 40 60 00 02 bal 8 +; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024 +; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1 +; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp) +; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1 +; CHECK-NEXT: 10492: 33 bd 00 08 addiu $sp, $sp, 8 +; CHECK-NEXT: 10496: 94 00 00 02 b 8 + +@x = external global i32, align 4 + +define void @foo() { + %1 = load i32, i32* @x, align 4 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %la, label %lf + +la: + switch i32 %1, label %le [ + i32 9999, label %lb + i32 19999, label %lc + ] + +lb: + tail call void asm sideeffect ".space 0", ""() + br label %le + +lc: + tail call void asm sideeffect ".space 0", ""() + br label %le + +le: + tail call void asm sideeffect ".space 66500", ""() + br label %lg + +lf: + tail call void asm sideeffect ".space 0", ""() + br label %lg + +lg: + tail call void asm sideeffect ".space 0", ""() + br label %li + +li: + tail call void asm sideeffect ".space 0", ""() + ret void +} diff --git a/test/CodeGen/Mips/micromips-gcc-except-table.ll b/test/CodeGen/Mips/micromips-gcc-except-table.ll new file mode 100644 index 000000000000..38a76927e2a8 --- /dev/null +++ b/test/CodeGen/Mips/micromips-gcc-except-table.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -O3 -filetype=obj < %s | llvm-objdump -s -j .gcc_except_table - | FileCheck %s + +; CHECK: Contents of section .gcc_except_table: +; CHECK-NEXT: 0000 ff9b1501 0c011100 00110e1f 011f1800 +; CHECK-NEXT: 0010 00010000 00000000 + +@_ZTIi = external constant i8* + +define dso_local i32 @main() local_unnamed_addr norecurse personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind + %0 = bitcast i8* %exception.i to i32* + store i32 5, i32* %0, align 16 + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn + to label %.noexc unwind label %return + +.noexc: + unreachable + +return: + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind + tail call void @__cxa_end_catch() + ret i32 0 +} + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr diff --git a/test/CodeGen/Mips/micromips-mtc-mfc.ll b/test/CodeGen/Mips/micromips-mtc-mfc.ll new file mode 100644 index 000000000000..084c57ab5d26 --- /dev/null +++ b/test/CodeGen/Mips/micromips-mtc-mfc.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips -mcpu=mips32r2 -mattr=+micromips \ +; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM2 %s +; RUN: llc -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \ +; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM6 %s + +define double @foo(double %a, double %b) { +; MM2-LABEL: foo: +; MM2: # %bb.0: # %entry +; MM2-NEXT: mov.d $f0, $f12 # encoding: [0x54,0x0c,0x20,0x7b] +; MM2-NEXT: mtc1 $zero, $f2 # encoding: [0x54,0x02,0x28,0x3b] +; MM2-NEXT: mthc1 $zero, $f2 # encoding: [0x54,0x02,0x38,0x3b] +; MM2-NEXT: c.ule.d $f12, $f2 # encoding: [0x54,0x4c,0x05,0xfc] +; MM2-NEXT: bc1t $BB0_2 # encoding: [0x43,0xa0,A,A] +; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_PC16_S1 +; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] +; MM2-NEXT: # %bb.1: # %entry +; MM2-NEXT: j $BB0_2 # encoding: [0b110101AA,A,A,A] +; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_26_S1 +; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] +; MM2-NEXT: $BB0_2: # %return +; MM2-NEXT: jrc $ra # encoding: [0x45,0xbf] +; +; MM6-LABEL: foo: +; MM6: # %bb.0: # %entry +; MM6-NEXT: mov.d $f0, $f12 # encoding: [0x46,0x20,0x60,0x06] +; MM6-NEXT: mtc1 $zero, $f1 # encoding: [0x54,0x01,0x28,0x3b] +; MM6-NEXT: mthc1 $zero, $f1 # encoding: [0x54,0x01,0x38,0x3b] +; MM6-NEXT: cmp.ule.d $f1, $f12, $f1 # encoding: [0x54,0x2c,0x09,0xd5] +; MM6-NEXT: mfc1 $2, $f1 # encoding: [0x54,0x41,0x20,0x3b] +; MM6-NEXT: andi16 $2, $2, 1 # encoding: [0x2d,0x21] +; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf] +entry: + %cmp = fcmp ogt double %a, 0.000000e+00 + br i1 %cmp, label %if.end, label %if.else + +if.else: + br label %return + +if.end: + %mul = fmul double %a, 2.000000e+00 + br label %return + +return: + ret double %a +} + +define double @bar(double %x, double %y) { +; MM2-LABEL: bar: +; MM2: # %bb.0: # %entry +; MM2-NEXT: mov.d $f0, $f14 # encoding: [0x54,0x0e,0x20,0x7b] +; MM2-NEXT: c.olt.d $f12, $f14 # encoding: [0x55,0xcc,0x05,0x3c] +; MM2-NEXT: jr $ra # encoding: [0x00,0x1f,0x0f,0x3c] +; MM2-NEXT: movt.d $f0, $f12, $fcc0 # encoding: [0x54,0x0c,0x02,0x60] +; +; MM6-LABEL: bar: +; MM6: # %bb.0: # %entry +; MM6-NEXT: cmp.lt.d $f0, $f12, $f14 # encoding: [0x55,0xcc,0x01,0x15] +; MM6-NEXT: mfc1 $1, $f0 # encoding: [0x54,0x20,0x20,0x3b] +; MM6-NEXT: mtc1 $1, $f0 # encoding: [0x44,0x81,0x00,0x00] +; MM6-NEXT: sel.d $f0, $f14, $f12 # encoding: [0x55,0x8e,0x02,0xb8] +; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf] +; FIXME: mtc1 is encoded as a regular non-microMIPS instruction +entry: + %z = fcmp olt double %x, %y + %r = select i1 %z, double %x, double %y + ret double %r +} diff --git a/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir b/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir new file mode 100644 index 000000000000..cb364de0e9bc --- /dev/null +++ b/test/CodeGen/Mips/shrink-wrap-buildpairf64-extractelementf64.mir @@ -0,0 +1,150 @@ +# RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \ +# RUN: -start-before=shrink-wrap -stop-after=prologepilog | FileCheck %s + +--- | + declare void @foo() + define void @testBuildPairF64() { + ret void + } + define void @testBuildPairF64_64() { + ret void + } + define void @testBuildPairF64implicitSp() { + ret void + } + define void @testBuildPairF64_64implicitSp() { + ret void + } + define void @testExtractElementF64() { + ret void + } + define void @testExtractElementF64_64() { + ret void + } + define void @testExtractElementF64implicitSp() { + ret void + } + define void @testExtractElementF64_64implicitSp() { + ret void + } +... +--- +name: testBuildPairF64 +# CHECK-LABEL: name: testBuildPairF64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: BuildPairF64 +body: | + bb.0: + $d0 = BuildPairF64 $zero, $zero + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64_64 +# CHECK-LABEL: name: testBuildPairF64_64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: BuildPairF64_64 +body: | + bb.0: + $d0_64 = BuildPairF64_64 $zero, $zero + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64implicitSp +# CHECK-LABEL: name: testBuildPairF64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $d0 = BuildPairF64 $zero, $zero, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testBuildPairF64_64implicitSp +# CHECK-LABEL: name: testBuildPairF64_64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $d0_64 = BuildPairF64_64 $zero, $zero, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64 +# CHECK-LABEL: name: testExtractElementF64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: ExtractElementF64 +body: | + bb.0: + $at = ExtractElementF64 $d6, 1 + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64_64 +# CHECK-LABEL: name: testExtractElementF64_64 +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: ExtractElementF64_64 +body: | + bb.0: + $at = ExtractElementF64_64 $d12_64, 1 + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64implicitSp +# CHECK-LABEL: name: testExtractElementF64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $at = ExtractElementF64 $d6, 1, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... +--- +name: testExtractElementF64_64implicitSp +# CHECK-LABEL: name: testExtractElementF64_64implicitSp +# CHECK: bb.0 +# CHECK-NEXT: successors +# CHECK-NEXT: {{[[:space:]]$}} +# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}} +body: | + bb.0: + $at = ExtractElementF64_64 $d12_64, 1, implicit $sp + bb.1: + JAL @foo, implicit-def $ra + bb.2: + RetRA +... diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll index 3ef9d63196c3..126cfea97287 100644 --- a/test/CodeGen/Mips/tls.ll +++ b/test/CodeGen/Mips/tls.ll @@ -48,14 +48,14 @@ entry: ; STATIC32-LABEL: f1: ; STATIC32: lui $[[R0:[0-9]+]], %tprel_hi(t1) ; STATIC32: addiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1) -; STATIC32: rdhwr $3, $29 +; STATIC32: rdhwr $3, $29{{$}} ; STATIC32: addu $[[R2:[0-9]+]], $3, $[[R1]] ; STATIC32: lw $2, 0($[[R2]]) ; STATIC64-LABEL: f1: ; STATIC64: lui $[[R0:[0-9]+]], %tprel_hi(t1) ; STATIC64: daddiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1) -; STATIC64: rdhwr $3, $29, 0 +; STATIC64: rdhwr $3, $29{{$}} ; STATIC64: daddu $[[R2:[0-9]+]], $3, $[[R0]] ; STATIC64: lw $2, 0($[[R2]]) } @@ -101,7 +101,7 @@ entry: ; STATIC32-LABEL: f2: ; STATIC32: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp) ; STATIC32: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp) -; STATIC32: rdhwr $3, $29 +; STATIC32: rdhwr $3, $29{{$}} ; STATIC32: lw $[[R0:[0-9]+]], %gottprel(t2)($[[GP]]) ; STATIC32: addu $[[R1:[0-9]+]], $3, $[[R0]] ; STATIC32: lw $2, 0($[[R1]]) @@ -109,7 +109,7 @@ entry: ; STATIC64-LABEL: f2: ; STATIC64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(f2))) ; STATIC64: daddiu $[[GP:[0-9]+]], $[[R0]], %lo(%neg(%gp_rel(f2))) -; STATIC64: rdhwr $3, $29 +; STATIC64: rdhwr $3, $29{{$}} ; STATIC64: ld $[[R0:[0-9]+]], %gottprel(t2)($[[GP]]) ; STATIC64: daddu $[[R1:[0-9]+]], $3, $[[R0]] ; STATIC64: lw $2, 0($[[R1]]) diff --git a/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll b/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll index e38c5beb80eb..643ec904ea38 100644 --- a/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ b/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -1,35 +1,46 @@ ; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8 +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-P8 ; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9 +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 @a = external local_unnamed_addr global <4 x i32>, align 16 @pb = external local_unnamed_addr global float*, align 8 define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { -; CHECK-P8-LABEL: testExpandPostRAPseudo: -; CHECK-P8: lxsiwax 34, 0, 3 -; CHECK-P8-NEXT: xxspltw 34, 34, 1 -; CHECK-P8-NEXT: stvx 2, 0, 4 -; CHECK-P8: #APP -; CHECK-P8-NEXT: #Clobber Rigisters -; CHECK-P8-NEXT: #NO_APP -; CHECK-P8-NEXT: lis 4, 1024 -; CHECK-P8-NEXT: lfiwax 0, 0, 3 -; CHECK-P8: stfsx 0, 3, 4 -; CHECK-P8-NEXT: blr - -; CHECK-P9-LABEL: testExpandPostRAPseudo: -; CHECK-P9: lxvwsx 0, 0, 3 -; CHECK-P9: stxvx 0, 0, 4 -; CHECK-P9: #APP -; CHECK-P9-NEXT: #Clobber Rigisters -; CHECK-P9-NEXT: #NO_APP -; CHECK-P9-NEXT: lis 4, 1024 -; CHECK-P9-NEXT: lfiwax 0, 0, 3 -; CHECK-P9: stfsx 0, 3, 4 -; CHECK-P9-NEXT: blr - +; CHECK-P8-LABEL: testExpandPostRAPseudo: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8: lfiwzx f0, 0, r3 +; CHECK-P8: ld r4, .LC0@toc@l(r4) +; CHECK-P8: xxpermdi vs0, f0, f0, 2 +; CHECK-P8: xxspltw v2, vs0, 3 +; CHECK-P8: stvx v2, 0, r4 +; CHECK-P8: lis r4, 1024 +; CHECK-P8: lfiwax f0, 0, r3 +; CHECK-P8: addis r3, r2, .LC1@toc@ha +; CHECK-P8: ld r3, .LC1@toc@l(r3) +; CHECK-P8: xscvsxdsp f0, f0 +; CHECK-P8: ld r3, 0(r3) +; CHECK-P8: stfsx f0, r3, r4 +; CHECK-P8: blr +; +; CHECK-P9-LABEL: testExpandPostRAPseudo: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: lfiwzx f0, 0, r3 +; CHECK-P9: addis r4, r2, .LC0@toc@ha +; CHECK-P9: ld r4, .LC0@toc@l(r4) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxspltw vs0, vs0, 3 +; CHECK-P9: stxvx vs0, 0, r4 +; CHECK-P9: lis r4, 1024 +; CHECK-P9: lfiwax f0, 0, r3 +; CHECK-P9: addis r3, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC1@toc@l(r3) +; CHECK-P9: xscvsxdsp f0, f0 +; CHECK-P9: ld r3, 0(r3) +; CHECK-P9: stfsx f0, r3, r4 +; CHECK-P9: blr entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll index f074e2a0c0d0..d192bafca235 100644 --- a/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/test/CodeGen/PowerPC/build-vector-tests.ll @@ -109,8 +109,8 @@ ;vector int spltRegVali(int val) { // ; return (vector int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector int spltMemVali(int *ptr) { // ; return (vector int)*ptr; // ;} // @@ -284,8 +284,8 @@ ;vector unsigned int spltRegValui(unsigned int val) { // ; return (vector unsigned int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector unsigned int spltMemValui(unsigned int *ptr) { // ; return (vector unsigned int)*ptr; // ;} // @@ -1202,15 +1202,21 @@ entry: ; P9LE-LABEL: spltMemVali ; P8BE-LABEL: spltMemVali ; P8LE-LABEL: spltMemVali -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } @@ -2338,15 +2344,21 @@ entry: ; P9LE-LABEL: spltMemValui ; P8BE-LABEL: spltMemValui ; P8LE-LABEL: spltMemValui -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } diff --git a/test/CodeGen/PowerPC/load-v4i8-improved.ll b/test/CodeGen/PowerPC/load-v4i8-improved.ll index 36f347222d5f..f1fa29960742 100644 --- a/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,15 +1,27 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s define <16 x i8> @test(i32* %s, i32* %t) { +; CHECK-LE-LABEL: test: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-LE-NEXT: xxspltw v2, vs0, 3 +; CHECK-LE-NEXT: blr + +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-NEXT: xxspltw v2, vs0, 0 +; CHECK-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> ret <16 x i8> %2 -; CHECK-LABEL: test -; CHECK: lxsiwax 34, 0, 3 -; CHECK: xxspltw 34, 34, 1 } diff --git a/test/CodeGen/PowerPC/power9-moves-and-splats.ll b/test/CodeGen/PowerPC/power9-moves-and-splats.ll index fc676cc0885f..5ccba80fa4b2 100644 --- a/test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ b/test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -1,47 +1,74 @@ -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE @Globi = external global i32, align 4 @Globf = external global float, align 4 define <2 x i64> @test1(i64 %a, i64 %b) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd v2, r4, r3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 +; CHECK-BE-NEXT: blr entry: ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp ; which will happen in a subsequent patch. -; CHECK-LABEL: test1 -; CHECK: mtvsrdd 34, 4, 3 -; CHECK-BE-LABEL: test1 -; CHECK-BE: mtvsrdd 34, 3, 4 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 ret <2 x i64> %vecins1 } define i64 @test2(<2 x i64> %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrld r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrd r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test2 -; CHECK: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 0 ret i64 %0 } define i64 @test3(<2 x i64> %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrld r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-BE-LABEL: test3 -; CHECK-BE: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 1 ret i64 %0 } define <4 x i32> @test4(i32* nocapture readonly %in) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test4 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test4 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* %in, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -49,13 +76,20 @@ entry: } define <4 x float> @test5(float* nocapture readonly %in) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test5 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test5 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* %in, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer @@ -63,17 +97,24 @@ entry: } define <4 x i32> @test6() { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test6 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC0 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test6 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC0 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* @Globi, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -81,17 +122,24 @@ entry: } define <4 x float> @test7() { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test7 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC1 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test7 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC1 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* @Globf, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer @@ -99,76 +147,120 @@ entry: } define <16 x i8> @test8() { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v2, v2, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor v2, v2, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test8 -; CHECK: xxlxor 34, 34, 34 -; CHECK-BE-LABEL: test8 -; CHECK-BE: xxlxor 34, 34, 34 ret <16 x i8> zeroinitializer } define <16 x i8> @test9() { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 1 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test9: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 1 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test9 -; CHECK: xxspltib 34, 1 -; CHECK-BE-LABEL: test9 -; CHECK-BE: xxspltib 34, 1 ret <16 x i8> } define <16 x i8> @test10() { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 127 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test10: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 127 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test10 -; CHECK: xxspltib 34, 127 -; CHECK-BE-LABEL: test10 -; CHECK-BE: xxspltib 34, 127 ret <16 x i8> } define <16 x i8> @test11() { +; CHECK-LABEL: test11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 128 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test11: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 128 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test11 -; CHECK: xxspltib 34, 128 -; CHECK-BE-LABEL: test11 -; CHECK-BE: xxspltib 34, 128 ret <16 x i8> } define <16 x i8> @test12() { +; CHECK-LABEL: test12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 255 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test12: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 255 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test12 -; CHECK: xxspltib 34, 255 -; CHECK-BE-LABEL: test12 -; CHECK-BE: xxspltib 34, 255 ret <16 x i8> } define <16 x i8> @test13() { +; CHECK-LABEL: test13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 129 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 129 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13 -; CHECK: xxspltib 34, 129 -; CHECK-BE-LABEL: test13 -; CHECK-BE: xxspltib 34, 129 ret <16 x i8> } define <16 x i8> @test13E127() { +; CHECK-LABEL: test13E127: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 200 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13E127: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 200 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13E127 -; CHECK: xxspltib 34, 200 -; CHECK-BE-LABEL: test13E127 -; CHECK-BE: xxspltib 34, 200 ret <16 x i8> } define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) { +; CHECK-LABEL: test14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz r3, 0(r5) +; CHECK-NEXT: mtvsrws v2, r3 +; CHECK-NEXT: addi r3, r3, 5 +; CHECK-NEXT: stw r3, 0(r5) +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test14: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lwz r3, 0(r5) +; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: addi r3, r3, 5 +; CHECK-BE-NEXT: stw r3, 0(r5) +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test14 -; CHECK: lwz [[LD:[0-9]+]], -; CHECK: mtvsrws 34, [[LD]] -; CHECK-BE-LABEL: test14 -; CHECK-BE: lwz [[LD:[0-9]+]], -; CHECK-BE: mtvsrws 34, [[LD]] %0 = load i32, i32* %b, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer diff --git a/test/CodeGen/PowerPC/pr38087.ll b/test/CodeGen/PowerPC/pr38087.ll index af8704f7d708..2736ffa723c1 100644 --- a/test/CodeGen/PowerPC/pr38087.ll +++ b/test/CodeGen/PowerPC/pr38087.ll @@ -11,9 +11,8 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 define void @draw_llvm_vs_variant0() { ; CHECK-LABEL: draw_llvm_vs_variant0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ldx r3, 0, r3 -; CHECK-NEXT: mtvsrd f0, r3 -; CHECK-NEXT: xxswapd v2, vs0 +; CHECK-NEXT: lfd f0, 0(r3) +; CHECK-NEXT: xxpermdi v2, f0, f0, 2 ; CHECK-NEXT: vmrglh v2, v2, v2 ; CHECK-NEXT: vextsh2w v2, v2 ; CHECK-NEXT: xvcvsxwsp vs0, v2 diff --git a/test/CodeGen/PowerPC/qpx-load-splat.ll b/test/CodeGen/PowerPC/qpx-load-splat.ll index 034961815178..1afd27262ba8 100644 --- a/test/CodeGen/PowerPC/qpx-load-splat.ll +++ b/test/CodeGen/PowerPC/qpx-load-splat.ll @@ -1,35 +1,44 @@ -; RUN: llc -verify-machineinstrs < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-bgq-linux" +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s ; Function Attrs: norecurse nounwind readonly define <4 x double> @foo(double* nocapture readonly %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvdsx v2, 0, r3 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %0 = load double, double* %a, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foo -; CHECK: lfd 1, 0(3) -; CHECK: blr } define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foox: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lxvdsx v2, r3, r4 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdx 1, 3, [[REG1]] -; CHECK: blr } define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 { +; CHECK-LABEL: fooxu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lfdux f0, r3, r4 +; CHECK-NEXT: xxspltd v2, vs0, 0 +; CHECK-NEXT: std r3, 0(r5) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 @@ -37,39 +46,36 @@ entry: %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer store double* %p, double** %pptr, align 8 ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdux 1, 3, [[REG1]] -; CHECK: std 3, 0(5) -; CHECK: blr } define <4 x float> @foof(float* nocapture readonly %a) #0 { +; CHECK-LABEL: foof: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %0 = load float, float* %a, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foof -; CHECK: lfs 1, 0(3) -; CHECK: blr } define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foofx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 2 +; CHECK-NEXT: lfiwzx f0, r3, r4 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %p = getelementptr float, float* %a, i64 %idx %0 = load float, float* %p, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foofx -; CHECK: sldi [[REG1:[0-9]+]], 4, 2 -; CHECK: lfsx 1, 3, [[REG1]] -; CHECK: blr } -attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" } diff --git a/test/CodeGen/PowerPC/scalar_vector_test_1.ll b/test/CodeGen/PowerPC/scalar_vector_test_1.ll new file mode 100644 index 000000000000..1b5ddff0374c --- /dev/null +++ b/test/CodeGen/PowerPC/scalar_vector_test_1.ll @@ -0,0 +1,292 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %int64, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3 +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %ptr1, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %f64, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 3 +; P8LE-NEXT: lfdx f0, r3, r4 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 3 +; P8BE-NEXT: lfdx f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r5 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfdx f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %ptr1, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + diff --git a/test/CodeGen/PowerPC/scalar_vector_test_2.ll b/test/CodeGen/PowerPC/scalar_vector_test_2.ll new file mode 100644 index 000000000000..da1b8bcaa3d0 --- /dev/null +++ b/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -0,0 +1,118 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx1: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvaddsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx1: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvaddsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx1: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvaddsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx1: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvaddsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fadd <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret void +} + +define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx2: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: mr r3, r5 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvsubsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx2: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: mr r3, r5 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvsubsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx2: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: mr r3, r5 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvsubsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx2: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: mr r3, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvsubsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fsub <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret <1 x float>* %C +} diff --git a/test/CodeGen/PowerPC/scalar_vector_test_3.ll b/test/CodeGen/PowerPC/scalar_vector_test_3.ll new file mode 100644 index 000000000000..c63044a79a5a --- /dev/null +++ b/test/CodeGen/PowerPC/scalar_vector_test_3.ll @@ -0,0 +1,265 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: lfiwax f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 2 +; P9BE-NEXT: lfiwax f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: lfiwax f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: lfiwax f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r5 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r5 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r5 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test6: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test6: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test6: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test6: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test7: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test7: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test7: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test7: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + diff --git a/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/test/CodeGen/PowerPC/scalar_vector_test_4.ll new file mode 100644 index 000000000000..aaaf0ba60f1d --- /dev/null +++ b/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -0,0 +1,341 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r5, r7, 2 +; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; P8LE-NEXT: lfiwzx f0, r3, r5 +; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE: sldi r4, r7, 2 +; P8BE: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %f64, align 4 + %vecins = insertelement <4 x float> %vec, float %0, i32 0 + ret <4 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE: sldi r4, r7, 2 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, r3, r4 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwzx f0, 0, r5 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE: lfiwzx f0, 0, r5 +; P9BE: xxspltw v2, v2, 1 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %ptr1, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll index 82c240e46d26..ac0bcc740681 100644 --- a/test/CodeGen/PowerPC/swaps-le-6.ll +++ b/test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,12 +1,15 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -O3 < %s | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \ ; RUN: --implicit-check-not xxswapd ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mattr=-power9-vector < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @@ -17,6 +20,31 @@ @y = global double 1.780000e+00, align 8 define void @bar0() { +; CHECK-LABEL: bar0: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxpermdi vs0, vs0, vs1, 1 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar0: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -25,21 +53,32 @@ entry: ret void } -; CHECK-LABEL: @bar0 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar0 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxvx [[REG5]] - define void @bar1() { +; CHECK-LABEL: bar1: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxmrghd vs0, vs1, vs0 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar1: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxmrgld vs0, vs0, vs1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -48,17 +87,3 @@ entry: ret void } -; CHECK-LABEL: @bar1 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar1 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxvx [[REG5]] - diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll index df4cf357e2e0..ef7d8f350075 100644 --- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -1,74 +1,125 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lfdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-NEXT: blr +; +; CHECK-P9-VECTOR-LABEL: testi0: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 +; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 +; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 +; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-P9-VECTOR-NEXT: blr +; +; CHECK-P9-LABEL: testi0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 0 ret <2 x double> %r -; CHECK-LABEL: testi0 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lfdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxpermdi 34, 0, 1, 1 -; CHECK-P9-LABEL: testi0 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lfdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-NEXT: blr +; +; CHECK-P9-VECTOR-LABEL: testi1: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4 +; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0 +; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0 +; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-P9-VECTOR-NEXT: blr +; +; CHECK-P9-LABEL: testi1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 1 ret <2 x double> %r -; CHECK-LABEL: testi1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lfdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxmrgld 34, 1, 0 -; CHECK-P9-LABEL: testi1 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]] } define double @teste0(<2 x double>* %p1) { +; CHECK-LABEL: teste0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK: blr +; +; CHECK-P9-VECTOR-LABEL: teste0: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3 +; CHECK-P9-VECTOR: blr +; +; CHECK-P9-LABEL: teste0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 0(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 0 ret double %r -; CHECK-LABEL: teste0 -; CHECK: lxvd2x 1, 0, 3 -; CHECK-P9-LABEL: teste0 -; CHECK-P9: lfd 1, 0(3) } define double @teste1(<2 x double>* %p1) { +; CHECK-LABEL: teste1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd vs1, vs0 +; CHECK: blr +; +; CHECK-P9-VECTOR-LABEL: teste1: +; CHECK-P9-VECTOR: # %bb.0: +; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0 +; CHECK-P9-VECTOR: blr +; +; CHECK-P9-LABEL: teste1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 8(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 1 ret double %r -; CHECK-LABEL: teste1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: xxswapd 1, 0 -; CHECK-P9-LABEL: teste1 -; CHECK-P9: lfd 1, 8(3) } diff --git a/test/CodeGen/X86/mingw-comdats.ll b/test/CodeGen/X86/mingw-comdats.ll index 2e9ebd8c9fc4..35f4fd12670c 100644 --- a/test/CodeGen/X86/mingw-comdats.ll +++ b/test/CodeGen/X86/mingw-comdats.ll @@ -1,13 +1,14 @@ -; RUN: llc -mtriple=x86_64-windows-itanium < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU -; RUN: llc -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32 -; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ +; RUN: llc -function-sections -mtriple=x86_64-windows-itanium < %s | FileCheck %s +; RUN: llc -function-sections -mtriple=x86_64-windows-msvc < %s | FileCheck %s +; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU +; RUN: llc -function-sections -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32 +; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ ; GCC and MSVC handle comdats completely differently. Make sure we do the right ; thing for each. -; Generated with this C++ source: +; Modeled on this C++ source, with additional modifications for +; -ffunction-sections: ; int bar(int); ; __declspec(selectany) int gv = 42; ; inline int foo(int x) { return bar(x) + gv; } @@ -26,8 +27,24 @@ entry: ret i32 %call } +; CHECK: .section .text,"xr",one_only,main ; CHECK: main: +; GNU: .section .text$main,"xr",one_only,main ; GNU: main: +; GNU32: .section .text$main,"xr",one_only,_main +; GNU32: _main: + +define dso_local x86_fastcallcc i32 @fastcall(i32 %x, i32 %y) { + %rv = add i32 %x, %y + ret i32 %rv +} + +; CHECK: .section .text,"xr",one_only,fastcall +; CHECK: fastcall: +; GNU: .section .text$fastcall,"xr",one_only,fastcall +; GNU: fastcall: +; GNU32: .section .text$fastcall,"xr",one_only,@fastcall@8 +; GNU32: @fastcall@8: ; Function Attrs: inlinehint uwtable define linkonce_odr dso_local i32 @_Z3fooi(i32 %x) #1 comdat { @@ -50,9 +67,9 @@ entry: ; GNU: gv: ; GNU: .long 42 -; GNU32: .section .text$__Z3fooi,"xr",discard,__Z3fooi +; GNU32: .section .text$_Z3fooi,"xr",discard,__Z3fooi ; GNU32: __Z3fooi: -; GNU32: .section .data$_gv,"dw",discard,_gv +; GNU32: .section .data$gv,"dw",discard,_gv ; GNU32: _gv: ; GNU32: .long 42 diff --git a/test/DebugInfo/Mips/eh_frame.ll b/test/DebugInfo/Mips/eh_frame.ll new file mode 100644 index 000000000000..4687443cb1cf --- /dev/null +++ b/test/DebugInfo/Mips/eh_frame.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -O3 -filetype=obj -o - %s | llvm-readelf -r | FileCheck %s + +; CHECK: .rel.eh_frame +; CHECK: DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .text +; CHECK-NEXT: .gcc_except_table + +@_ZTIi = external constant i8* + +define dso_local i32 @main() local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind + %0 = bitcast i8* %exception.i to i32* + store i32 5, i32* %0, align 16 + invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn + to label %.noexc unwind label %return + +.noexc: + unreachable + +return: + %1 = landingpad { i8*, i32 } + catch i8* null + %2 = extractvalue { i8*, i32 } %1, 0 + %3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind + tail call void @__cxa_end_catch() + ret i32 0 +} + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr diff --git a/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll b/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll new file mode 100644 index 000000000000..231e716cb6d6 --- /dev/null +++ b/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll @@ -0,0 +1,69 @@ +; RUN: opt -S -lcssa < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Reproducer for PR39019. +; +; Verify that the llvm.dbg.value in the %for.cond.cleanup2 block is rewritten +; to use the PHI node for %add that is created by LCSSA. + +; CHECK-LABEL: for.cond.cleanup2: +; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %for.cond.cleanup1 ] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR:![0-9]+]], metadata !DIExpression()) +; CHECK-NEXT: call void @bar(i32 [[PN]]) + +; CHECK-LABEL: for.body: +; CHECK: %add = add nsw i32 0, 2 +; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR]], metadata !DIExpression()) + +; CHECK: [[VAR]] = !DILocalVariable(name: "sum", + +; Function Attrs: nounwind +define void @foo() #0 !dbg !6 { +entry: + br label %for.cond.preheader, !dbg !12 + +for.cond.preheader: ; preds = %for.cond.cleanup1, %entry + br label %for.body, !dbg !12 + +for.cond.cleanup2: ; preds = %for.cond.cleanup1 + call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12 + tail call void @bar(i32 %add) #0, !dbg !12 + ret void, !dbg !12 + +for.cond.cleanup1: ; preds = %for.body + br i1 false, label %for.cond.preheader, label %for.cond.cleanup2, !dbg !12 + +for.body: ; preds = %for.body, %for.cond.preheader + %add = add nsw i32 0, 2, !dbg !12 + call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12 + br i1 false, label %for.body, label %for.cond.cleanup1, !dbg !12 +} + +; Function Attrs: nounwind +declare void @bar(i32) #0 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2) +!1 = !DIFile(filename: "foo.c", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 8.0.0"} +!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 10, type: !7, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9} +!9 = !DILocalVariable(name: "sum", scope: !10, file: !1, line: 11, type: !11) +!10 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 0) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocation(line: 0, scope: !10) diff --git a/tools/llvm-exegesis/lib/CMakeLists.txt b/tools/llvm-exegesis/lib/CMakeLists.txt index 175c2adf9de8..194304adf985 100644 --- a/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/tools/llvm-exegesis/lib/CMakeLists.txt @@ -1,12 +1,16 @@ +set(TARGETS_TO_APPEND "") + if (LLVM_TARGETS_TO_BUILD MATCHES "X86") add_subdirectory(X86) - set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} X86" PARENT_SCOPE) + set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} X86") endif() if (LLVM_TARGETS_TO_BUILD MATCHES "AArch64") add_subdirectory(AArch64) - set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} AArch64" PARENT_SCOPE) + set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} AArch64") endif() +set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} ${TARGETS_TO_APPEND}" PARENT_SCOPE) + add_library(LLVMExegesis STATIC Analysis.cpp From 6ae2bfad8ae4459e286c88b4c5b5584c2577b317 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 1 Dec 2018 15:41:40 +0000 Subject: [PATCH 2/6] Vendor import of clang release_70 branch r348011: https://llvm.org/svn/llvm-project/cfe/branches/release_70@348011 --- include/clang/Basic/AttrDocs.td | 2 +- .../clang/Basic/DiagnosticFrontendKinds.td | 2 +- include/clang/Basic/DiagnosticSemaKinds.td | 2 - lib/CodeGen/CodeGenModule.cpp | 2 - lib/CodeGen/CoverageMappingGen.cpp | 49 +++++++++-- lib/Driver/ToolChains/Arch/PPC.cpp | 10 ++- lib/Driver/ToolChains/Arch/PPC.h | 2 +- lib/Headers/altivec.h | 63 ++++++++------ lib/Sema/SemaInit.cpp | 5 +- lib/Serialization/ASTWriter.cpp | 5 +- test/CodeGen/builtins-ppc-altivec.c | 84 +++++++++---------- test/CodeGen/builtins-ppc-quadword.c | 32 +++---- test/CodeGen/builtins-ppc-vsx.c | 36 ++++---- test/CoverageMapping/macros.c | 10 +++ test/Driver/openbsd.c | 5 ++ test/Frontend/warning-stdlibcxx-darwin.cpp | 2 +- test/Sema/attr-ifunc.c | 4 - test/SemaCXX/sourceranges.cpp | 7 ++ test/SemaOpenCL/extension-begin.cl | 56 +++++-------- test/SemaOpenCL/extension-begin.h | 26 ++++++ tools/scan-build/bin/scan-build | 4 +- 21 files changed, 247 insertions(+), 161 deletions(-) create mode 100644 test/SemaOpenCL/extension-begin.h diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td index bb2993eab4bc..5a937b15d32e 100644 --- a/include/clang/Basic/AttrDocs.td +++ b/include/clang/Basic/AttrDocs.td @@ -3364,7 +3364,7 @@ def IFuncDocs : Documentation { let Content = [{ ``__attribute__((ifunc("resolver")))`` is used to mark that the address of a declaration should be resolved at runtime by calling a resolver function. -The symbol name of the resolver function is given in quotes. A function with this name (after mangling) must be defined in the current translation unit; it may be ``static``. The resolver function should take no arguments and return a pointer. +The symbol name of the resolver function is given in quotes. A function with this name (after mangling) must be defined in the current translation unit; it may be ``static``. The resolver function should return a pointer. The ``ifunc`` attribute may only be used on a function declaration. A function declaration with an ``ifunc`` attribute is considered to be a definition of the declared entity. The entity must not have weak linkage; for example, in C++, it cannot be applied to a declaration if a definition at that location would be considered inline. diff --git a/include/clang/Basic/DiagnosticFrontendKinds.td b/include/clang/Basic/DiagnosticFrontendKinds.td index 66287933b7ca..2bf85e0098d5 100644 --- a/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/include/clang/Basic/DiagnosticFrontendKinds.td @@ -238,7 +238,7 @@ def warn_option_invalid_ocl_version : Warning< "OpenCL version %0 does not support the option '%1'">, InGroup; def warn_stdlibcxx_not_found : Warning< - "include path for stdlibc++ headers not found; pass '-std=libc++' on the " + "include path for stdlibc++ headers not found; pass '-stdlib=libc++' on the " "command line to use the libc++ standard library instead">, InGroup>; } diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index dc192aafe38c..b2418569152d 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2857,8 +2857,6 @@ def err_cyclic_alias : Error< "%select{alias|ifunc}0 definition is part of a cycle">; def err_ifunc_resolver_return : Error< "ifunc resolver function must return a pointer">; -def err_ifunc_resolver_params : Error< - "ifunc resolver function must have no parameters">; def warn_attribute_wrong_decl_type_str : Warning< "%0 attribute only applies to %1">, InGroup; def err_attribute_wrong_decl_type_str : Error< diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 155ee6c6af12..76112e191c71 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -320,8 +320,6 @@ void CodeGenModule::checkAliases() { assert(FTy); if (!FTy->getReturnType()->isPointerTy()) Diags.Report(Location, diag::err_ifunc_resolver_return); - if (FTy->getNumParams()) - Diags.Report(Location, diag::err_ifunc_resolver_params); } llvm::Constant *Aliasee = Alias->getIndirectSymbol(); diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 389d29e467b7..45a260b41d41 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -553,6 +553,15 @@ struct CounterCoverageMappingBuilder completeDeferred(Count, DeferredEndLoc); } + size_t locationDepth(SourceLocation Loc) { + size_t Depth = 0; + while (Loc.isValid()) { + Loc = getIncludeOrExpansionLoc(Loc); + Depth++; + } + return Depth; + } + /// Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the @@ -567,19 +576,41 @@ struct CounterCoverageMappingBuilder SourceLocation EndLoc = Region.hasEndLoc() ? Region.getEndLoc() : RegionStack[ParentIndex].getEndLoc(); + size_t StartDepth = locationDepth(StartLoc); + size_t EndDepth = locationDepth(EndLoc); while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) { - // The region ends in a nested file or macro expansion. Create a - // separate region for each expansion. - SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); - assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); + bool UnnestStart = StartDepth >= EndDepth; + bool UnnestEnd = EndDepth >= StartDepth; + if (UnnestEnd) { + // The region ends in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); + assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); - if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) - SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); + if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) + SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); - EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); - if (EndLoc.isInvalid()) - llvm::report_fatal_error("File exit not handled before popRegions"); + EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); + if (EndLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + EndDepth--; + } + if (UnnestStart) { + // The region begins in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc); + assert(SM.isWrittenInSameFile(StartLoc, NestedLoc)); + + if (!isRegionAlreadyAdded(StartLoc, NestedLoc)) + SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc); + + StartLoc = getIncludeOrExpansionLoc(StartLoc); + if (StartLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + StartDepth--; + } } + Region.setStartLoc(StartLoc); Region.setEndLoc(EndLoc); MostRecentLocation = EndLoc; diff --git a/lib/Driver/ToolChains/Arch/PPC.cpp b/lib/Driver/ToolChains/Arch/PPC.cpp index f6a95962ace3..791f1206cf25 100644 --- a/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/lib/Driver/ToolChains/Arch/PPC.cpp @@ -107,15 +107,19 @@ void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, if (FloatABI == ppc::FloatABI::Soft) Features.push_back("-hard-float"); - ppc::ReadGOTPtrMode ReadGOT = ppc::getPPCReadGOTPtrMode(D, Args); + ppc::ReadGOTPtrMode ReadGOT = ppc::getPPCReadGOTPtrMode(D, Triple, Args); if (ReadGOT == ppc::ReadGOTPtrMode::SecurePlt) Features.push_back("+secure-plt"); } -ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const ArgList &Args) { +ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) { if (Args.getLastArg(options::OPT_msecure_plt)) return ppc::ReadGOTPtrMode::SecurePlt; - return ppc::ReadGOTPtrMode::Bss; + if (Triple.isOSOpenBSD()) + return ppc::ReadGOTPtrMode::SecurePlt; + else + return ppc::ReadGOTPtrMode::Bss; } ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) { diff --git a/lib/Driver/ToolChains/Arch/PPC.h b/lib/Driver/ToolChains/Arch/PPC.h index 3acee91a2ac3..4f3cd688ca39 100644 --- a/lib/Driver/ToolChains/Arch/PPC.h +++ b/lib/Driver/ToolChains/Arch/PPC.h @@ -38,7 +38,7 @@ FloatABI getPPCFloatABI(const Driver &D, const llvm::opt::ArgList &Args); std::string getPPCTargetCPU(const llvm::opt::ArgList &Args); const char *getPPCAsmModeForCPU(StringRef Name); -ReadGOTPtrMode getPPCReadGOTPtrMode(const Driver &D, +ReadGOTPtrMode getPPCReadGOTPtrMode(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h index 90fd477d9b98..eaa56cb47b8d 100644 --- a/lib/Headers/altivec.h +++ b/lib/Headers/altivec.h @@ -16353,67 +16353,82 @@ vec_revb(vector unsigned __int128 __a) { /* vec_xl */ +typedef vector signed char unaligned_vec_schar __attribute__((aligned(1))); +typedef vector unsigned char unaligned_vec_uchar __attribute__((aligned(1))); +typedef vector signed short unaligned_vec_sshort __attribute__((aligned(1))); +typedef vector unsigned short unaligned_vec_ushort __attribute__((aligned(1))); +typedef vector signed int unaligned_vec_sint __attribute__((aligned(1))); +typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); +typedef vector float unaligned_vec_float __attribute__((aligned(1))); + static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset, signed char *__ptr) { - return *(vector signed char *)(__ptr + __offset); + return *(unaligned_vec_schar *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned char vec_xl(signed long long __offset, unsigned char *__ptr) { - return *(vector unsigned char *)(__ptr + __offset); + return *(unaligned_vec_uchar*)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, signed short *__ptr) { - return *(vector signed short *)(__ptr + __offset); + return *(unaligned_vec_sshort *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned short vec_xl(signed long long __offset, unsigned short *__ptr) { - return *(vector unsigned short *)(__ptr + __offset); + return *(unaligned_vec_ushort *)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, signed int *__ptr) { - return *(vector signed int *)(__ptr + __offset); + return *(unaligned_vec_sint *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, unsigned int *__ptr) { - return *(vector unsigned int *)(__ptr + __offset); + return *(unaligned_vec_uint *)(__ptr + __offset); } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, float *__ptr) { - return *(vector float *)(__ptr + __offset); + return *(unaligned_vec_float *)(__ptr + __offset); } #ifdef __VSX__ +typedef vector signed long long unaligned_vec_sll __attribute__((aligned(1))); +typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); +typedef vector double unaligned_vec_double __attribute__((aligned(1))); + static inline __ATTRS_o_ai vector signed long long vec_xl(signed long long __offset, signed long long *__ptr) { - return *(vector signed long long *)(__ptr + __offset); + return *(unaligned_vec_sll *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned long long vec_xl(signed long long __offset, unsigned long long *__ptr) { - return *(vector unsigned long long *)(__ptr + __offset); + return *(unaligned_vec_ull *)(__ptr + __offset); } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, double *__ptr) { - return *(vector double *)(__ptr + __offset); + return *(unaligned_vec_double *)(__ptr + __offset); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) +typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); +typedef vector unsigned __int128 unaligned_vec_ui128 + __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 vec_xl(signed long long __offset, signed __int128 *__ptr) { - return *(vector signed __int128 *)(__ptr + __offset); + return *(unaligned_vec_si128 *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned __int128 vec_xl(signed long long __offset, unsigned __int128 *__ptr) { - return *(vector unsigned __int128 *)(__ptr + __offset); + return *(unaligned_vec_ui128 *)(__ptr + __offset); } #endif @@ -16498,62 +16513,62 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, signed long long __offset, signed char *__ptr) { - *(vector signed char *)(__ptr + __offset) = __vec; + *(unaligned_vec_schar *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned char __vec, signed long long __offset, unsigned char *__ptr) { - *(vector unsigned char *)(__ptr + __offset) = __vec; + *(unaligned_vec_uchar *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed short __vec, signed long long __offset, signed short *__ptr) { - *(vector signed short *)(__ptr + __offset) = __vec; + *(unaligned_vec_sshort *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned short __vec, signed long long __offset, unsigned short *__ptr) { - *(vector unsigned short *)(__ptr + __offset) = __vec; + *(unaligned_vec_ushort *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector signed int __vec, signed long long __offset, signed int *__ptr) { - *(vector signed int *)(__ptr + __offset) = __vec; + *(unaligned_vec_sint *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned int __vec, signed long long __offset, unsigned int *__ptr) { - *(vector unsigned int *)(__ptr + __offset) = __vec; + *(unaligned_vec_uint *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector float __vec, signed long long __offset, float *__ptr) { - *(vector float *)(__ptr + __offset) = __vec; + *(unaligned_vec_float *)(__ptr + __offset) = __vec; } #ifdef __VSX__ static inline __ATTRS_o_ai void vec_xst(vector signed long long __vec, signed long long __offset, signed long long *__ptr) { - *(vector signed long long *)(__ptr + __offset) = __vec; + *(unaligned_vec_sll *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned long long __vec, signed long long __offset, unsigned long long *__ptr) { - *(vector unsigned long long *)(__ptr + __offset) = __vec; + *(unaligned_vec_ull *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector double __vec, signed long long __offset, double *__ptr) { - *(vector double *)(__ptr + __offset) = __vec; + *(unaligned_vec_double *)(__ptr + __offset) = __vec; } #endif @@ -16561,13 +16576,13 @@ static inline __ATTRS_o_ai void vec_xst(vector double __vec, static inline __ATTRS_o_ai void vec_xst(vector signed __int128 __vec, signed long long __offset, signed __int128 *__ptr) { - *(vector signed __int128 *)(__ptr + __offset) = __vec; + *(unaligned_vec_si128 *)(__ptr + __offset) = __vec; } static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, signed long long __offset, unsigned __int128 *__ptr) { - *(vector unsigned __int128 *)(__ptr + __offset) = __vec; + *(unaligned_vec_ui128 *)(__ptr + __offset) = __vec; } #endif diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp index 5070996d50e0..4f9239cb4258 100644 --- a/lib/Sema/SemaInit.cpp +++ b/lib/Sema/SemaInit.cpp @@ -6092,7 +6092,10 @@ PerformConstructorInitialization(Sema &S, TypeSourceInfo *TSInfo = Entity.getTypeSourceInfo(); if (!TSInfo) TSInfo = S.Context.getTrivialTypeSourceInfo(Entity.getType(), Loc); - SourceRange ParenOrBraceRange = Kind.getParenOrBraceRange(); + SourceRange ParenOrBraceRange = + (Kind.getKind() == InitializationKind::IK_DirectList) + ? SourceRange(LBraceLoc, RBraceLoc) + : Kind.getParenOrBraceRange(); if (auto *Shadow = dyn_cast( Step.Function.FoundDecl.getDecl())) { diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 1a8d806e9d24..f74269ee348f 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -5022,13 +5022,16 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteFPPragmaOptions(SemaRef.getFPOptions()); WriteOpenCLExtensions(SemaRef); WriteOpenCLExtensionTypes(SemaRef); - WriteOpenCLExtensionDecls(SemaRef); WriteCUDAPragmas(SemaRef); // If we're emitting a module, write out the submodule information. if (WritingModule) WriteSubmodules(WritingModule); + // We need to have information about submodules to correctly deserialize + // decls from OpenCLExtensionDecls block + WriteOpenCLExtensionDecls(SemaRef); + Stream.EmitRecord(SPECIAL_TYPES, SpecialTypes); // Write the record containing external, unnamed definitions. diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c index 99cf3c253879..8c22de4e1efa 100644 --- a/test/CodeGen/builtins-ppc-altivec.c +++ b/test/CodeGen/builtins-ppc-altivec.c @@ -9338,32 +9338,32 @@ void test9() { // CHECK-LABEL: define void @test9 // CHECK-LE-LABEL: define void @test9 res_vsc = vec_xl(param_sll, ¶m_sc); - // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 res_vuc = vec_xl(param_sll, ¶m_uc); - // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 res_vs = vec_xl(param_sll, ¶m_s); - // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 res_vus = vec_xl(param_sll, ¶m_us); - // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 res_vi = vec_xl(param_sll, ¶m_i); - // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 res_vui = vec_xl(param_sll, ¶m_ui); - // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 res_vf = vec_xl(param_sll, ¶m_f); - // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 } /* ------------------------------ vec_xst ----------------------------------- */ @@ -9371,32 +9371,32 @@ void test10() { // CHECK-LABEL: define void @test10 // CHECK-LE-LABEL: define void @test10 vec_xst(vsc, param_sll, ¶m_sc); - // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 vec_xst(vuc, param_sll, ¶m_uc); - // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 vec_xst(vs, param_sll, ¶m_s); - // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 vec_xst(vus, param_sll, ¶m_us); - // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 vec_xst(vi, param_sll, ¶m_i); - // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 vec_xst(vui, param_sll, ¶m_ui); - // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 vec_xst(vf, param_sll, ¶m_f); - // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 } /* ----------------------------- vec_xl_be ---------------------------------- */ @@ -9404,35 +9404,35 @@ void test11() { // CHECK-LABEL: define void @test11 // CHECK-LE-LABEL: define void @test11 res_vsc = vec_xl_be(param_sll, ¶m_sc); - // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> res_vuc = vec_xl_be(param_sll, ¶m_uc); - // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: load <16 x i8>, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> res_vs = vec_xl_be(param_sll, ¶m_s); - // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> res_vus = vec_xl_be(param_sll, ¶m_us); - // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: load <8 x i16>, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> res_vi = vec_xl_be(param_sll, ¶m_i); - // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) res_vui = vec_xl_be(param_sll, ¶m_ui); - // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) res_vf = vec_xl_be(param_sll, ¶m_f); - // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK: load <4 x float>, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(i8* %{{[0-9]+}}) } @@ -9441,34 +9441,34 @@ void test12() { // CHECK-LABEL: define void @test12 // CHECK-LE-LABEL: define void @test12 vec_xst_be(vsc, param_sll, ¶m_sc); - // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vuc, param_sll, ¶m_uc); - // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 16 + // CHECK: store <16 x i8> %{{[0-9]+}}, <16 x i8>* %{{[0-9]+}}, align 1 // CHECK-LE: shufflevector <16 x i8> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i32> // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vs, param_sll, ¶m_s); - // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vus, param_sll, ¶m_us); - // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 16 + // CHECK: store <8 x i16> %{{[0-9]+}}, <8 x i16>* %{{[0-9]+}}, align 1 // CHECK-LE: shufflevector <8 x i16> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i32> // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vi, param_sll, ¶m_i); - // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vui, param_sll, ¶m_ui); - // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x i32> %{{[0-9]+}}, <4 x i32>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vf, param_sll, ¶m_f); - // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 16 + // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) } diff --git a/test/CodeGen/builtins-ppc-quadword.c b/test/CodeGen/builtins-ppc-quadword.c index 7d014db61323..868fb183a623 100644 --- a/test/CodeGen/builtins-ppc-quadword.c +++ b/test/CodeGen/builtins-ppc-quadword.c @@ -205,45 +205,45 @@ void test1() { /* vec_xl */ res_vlll = vec_xl(param_sll, ¶m_lll); - // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xl' is ambiguous res_vulll = vec_xl(param_sll, ¶m_ulll); - // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xl' is ambiguous /* vec_xst */ vec_xst(vlll, param_sll, ¶m_lll); - // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xst' is ambiguous vec_xst(vulll, param_sll, ¶m_ulll); - // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xst' is ambiguous /* vec_xl_be */ res_vlll = vec_xl_be(param_sll, ¶m_lll); - // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xl' is ambiguous res_vulll = vec_xl_be(param_sll, ¶m_ulll); - // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: load <1 x i128>, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xl' is ambiguous /* vec_xst_be */ vec_xst_be(vlll, param_sll, ¶m_lll); - // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xst' is ambiguous vec_xst_be(vulll, param_sll, ¶m_ulll); - // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 - // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 16 + // CHECK: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 + // CHECK-LE: store <1 x i128> %{{[0-9]+}}, <1 x i128>* %{{[0-9]+}}, align 1 // CHECK-PPC: error: call to 'vec_xst' is ambiguous } diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c index 848d24d4fb10..29b7149e1e73 100644 --- a/test/CodeGen/builtins-ppc-vsx.c +++ b/test/CodeGen/builtins-ppc-vsx.c @@ -1637,51 +1637,51 @@ res_vsll = vec_slo(vsll, vsc); // CHECK-LE: @llvm.ppc.altivec.vsro res_vsll = vec_xl(sll, asll); -// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 -// CHECK-LE: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 +// CHECK-LE: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 res_vull = vec_xl(sll, aull); -// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 -// CHECK-LE: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 +// CHECK-LE: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 res_vd = vec_xl(sll, ad); -// CHECK: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 16 -// CHECK-LE: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 1 +// CHECK-LE: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 1 vec_xst(vsll, sll, asll); -// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 -// CHECK-LE: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 +// CHECK-LE: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 vec_xst(vull, sll, aull); -// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 -// CHECK-LE: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 +// CHECK-LE: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 vec_xst(vd, sll, ad); -// CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 -// CHECK-LE: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 1 +// CHECK-LE: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 1 res_vsll = vec_xl_be(sll, asll); -// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) res_vull = vec_xl_be(sll, aull); -// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x i64>, <2 x i64>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) res_vd = vec_xl_be(sll, ad); -// CHECK: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK: load <2 x double>, <2 x double>* %{{[0-9]+}}, align 1 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %{{[0-9]+}}) vec_xst_be(vsll, sll, asll); -// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vull, sll, aull); -// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x i64> %{{[0-9]+}}, <2 x i64>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) vec_xst_be(vd, sll, ad); -// CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 16 +// CHECK: store <2 x double> %{{[0-9]+}}, <2 x double>* %{{[0-9]+}}, align 1 // CHECK-LE: call void @llvm.ppc.vsx.stxvd2x.be(<2 x double> %{{[0-9]+}}, i8* %{{[0-9]+}}) res_vf = vec_neg(vf); diff --git a/test/CoverageMapping/macros.c b/test/CoverageMapping/macros.c index 95fe37ed7e8d..39cd190b2a88 100644 --- a/test/CoverageMapping/macros.c +++ b/test/CoverageMapping/macros.c @@ -4,6 +4,7 @@ #define MACRO_2 bar() #define MACRO_1 return; MACRO_2 #define MACRO_3 MACRO_2 +#define GOTO goto void bar() {} @@ -56,6 +57,15 @@ void func5() { // CHECK-NEXT: File 0, [[@LINE]]:14 -> [[@LINE+4]]:2 = #0 // CHECK-NEXT: Expansion,File 1, 6:17 -> 6:24 = #1 // CHECK-NEXT: File 2, 4:17 -> 4:22 = #1 +// CHECK-NEXT: func6 +void func6(unsigned count) { // CHECK-NEXT: File 0, [[@LINE]]:28 -> [[@LINE+4]]:2 = #0 +begin: // CHECK-NEXT: File 0, [[@LINE]]:1 -> [[@LINE+3]]:2 = #1 + if (count--) // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:16 = #1 + GOTO begin; // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:19 = #2 +} +// CHECK-NEXT: Expansion,File 0, [[@LINE-2]]:9 -> [[@LINE-2]]:13 = #2 +// CHECK-NEXT: File 1, 7:14 -> 7:18 = #2 + int main(int argc, const char *argv[]) { func(); func2(); diff --git a/test/Driver/openbsd.c b/test/Driver/openbsd.c index d3846b7acf66..4aafa2f1f50c 100644 --- a/test/Driver/openbsd.c +++ b/test/Driver/openbsd.c @@ -112,3 +112,8 @@ // RUN: | FileCheck -check-prefix=CHECK-ARM-FLOAT-ABI %s // CHECK-ARM-FLOAT-ABI-NOT: "-target-feature" "+soft-float" // CHECK-ARM-FLOAT-ABI: "-target-feature" "+soft-float-abi" + +// Check PowerPC for Secure PLT +// RUN: %clang -target powerpc-unknown-openbsd -### -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-POWERPC-SECUREPLT %s +// CHECK-POWERPC-SECUREPLT: "-target-feature" "+secure-plt" diff --git a/test/Frontend/warning-stdlibcxx-darwin.cpp b/test/Frontend/warning-stdlibcxx-darwin.cpp index 3c132b6a833b..9f31373be7f8 100644 --- a/test/Frontend/warning-stdlibcxx-darwin.cpp +++ b/test/Frontend/warning-stdlibcxx-darwin.cpp @@ -1,5 +1,5 @@ // RUN: %clang -cc1 -triple arm64-apple-ios6.0.0 -isysroot %S/doesnotexist %s 2>&1 | FileCheck %s // RUN: %clang -cc1 -triple arm64-apple-ios6.0.0 -isysroot %S/doesnotexist -stdlib=libc++ %s -verify -// CHECK: include path for stdlibc++ headers not found; pass '-std=libc++' on the command line to use the libc++ standard library instead +// CHECK: include path for stdlibc++ headers not found; pass '-stdlib=libc++' on the command line to use the libc++ standard library instead // expected-no-diagnostics diff --git a/test/Sema/attr-ifunc.c b/test/Sema/attr-ifunc.c index af7a7e33da09..907b61c4451e 100644 --- a/test/Sema/attr-ifunc.c +++ b/test/Sema/attr-ifunc.c @@ -27,10 +27,6 @@ void f4_ifunc() {} void f4() __attribute__((ifunc("f4_ifunc"))); //expected-error@-1 {{ifunc resolver function must return a pointer}} -void* f5_ifunc(int i) { return 0; } -void f5() __attribute__((ifunc("f5_ifunc"))); -//expected-error@-1 {{ifunc resolver function must have no parameters}} - #else void f1a() __asm("f1"); void f1a() {} diff --git a/test/SemaCXX/sourceranges.cpp b/test/SemaCXX/sourceranges.cpp index 58772a063915..53f2f57e6754 100644 --- a/test/SemaCXX/sourceranges.cpp +++ b/test/SemaCXX/sourceranges.cpp @@ -52,6 +52,13 @@ void construct() { // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} 'D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}' } +namespace PR38987 { +struct A { A(); }; +template void f() { T{}; } +template void f(); +// CHECK: CXXTemporaryObjectExpr {{.*}} 'PR38987::A':'PR38987::A' +} + void abort() __attribute__((noreturn)); namespace std { diff --git a/test/SemaOpenCL/extension-begin.cl b/test/SemaOpenCL/extension-begin.cl index 92ea88143233..5f7e70dd778c 100644 --- a/test/SemaOpenCL/extension-begin.cl +++ b/test/SemaOpenCL/extension-begin.cl @@ -1,37 +1,29 @@ // Test this without pch. -// RUN: %clang_cc1 %s -DHEADER -DHEADER_USER -triple spir-unknown-unknown -verify -pedantic -fsyntax-only +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -verify -pedantic -fsyntax-only // Test with pch. -// RUN: %clang_cc1 %s -DHEADER -triple spir-unknown-unknown -emit-pch -o %t -verify -pedantic -// RUN: %clang_cc1 %s -DHEADER_USER -triple spir-unknown-unknown -include-pch %t -fsyntax-only -verify -pedantic +// RUN: %clang_cc1 -x cl %S/extension-begin.h -triple spir-unknown-unknown -emit-pch -o %t.pch -pedantic +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -include-pch %t.pch -DIMPLICIT_INCLUDE -DUSE_PCH -fsyntax-only -verify -pedantic -#if defined(HEADER) && !defined(INCLUDED) -#define INCLUDED +// Test with modules +// RUN: rm -rf %t.modules +// RUN: mkdir -p %t.modules +// +// RUN: %clang_cc1 -cl-std=CL1.2 -DIMPLICIT_INCLUDE -include %S/extension-begin.h -triple spir-unknown-unknown -O0 -emit-llvm -o - -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.modules %s -verify -pedantic +// +// RUN: rm -rf %t.modules +// RUN: mkdir -p %t.modules +// +// RUN: %clang_cc1 -cl-std=CL2.0 -DIMPLICIT_INCLUDE -include %S/extension-begin.h -triple spir-unknown-unknown -O0 -emit-llvm -o - -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.modules %s -verify -pedantic -#pragma OPENCL EXTENSION all : begin // expected-warning {{expected 'disable' - ignoring}} -#pragma OPENCL EXTENSION all : end // expected-warning {{expected 'disable' - ignoring}} - -#pragma OPENCL EXTENSION my_ext : begin - -struct A { - int a; -}; - -typedef struct A TypedefOfA; -typedef const TypedefOfA* PointerOfA; - -void f(void); - -__attribute__((overloadable)) void g(long x); - -#pragma OPENCL EXTENSION my_ext : end -#pragma OPENCL EXTENSION my_ext : end // expected-warning {{OpenCL extension end directive mismatches begin directive - ignoring}} - -__attribute__((overloadable)) void g(void); - -#endif // defined(HEADER) && !defined(INCLUDED) - -#ifdef HEADER_USER +#ifndef IMPLICIT_INCLUDE +#include "extension-begin.h" +#endif // IMPLICIT_INCLUDE +#ifndef USE_PCH +// expected-warning@extension-begin.h:4 {{expected 'disable' - ignoring}} +// expected-warning@extension-begin.h:5 {{expected 'disable' - ignoring}} +// expected-warning@extension-begin.h:21 {{OpenCL extension end directive mismatches begin directive - ignoring}} +#endif // USE_PCH #pragma OPENCL EXTENSION my_ext : enable void test_f1(void) { @@ -48,9 +40,7 @@ void test_f2(void) { PointerOfA test_A_pointer; // expected-error {{use of type 'PointerOfA' (aka 'const struct A *') requires my_ext extension to be enabled}} f(); // expected-error {{use of declaration 'f' requires my_ext extension to be enabled}} g(0); // expected-error {{no matching function for call to 'g'}} - // expected-note@-26 {{candidate disabled due to OpenCL extension}} - // expected-note@-22 {{candidate function not viable: requires 0 arguments, but 1 was provided}} + // expected-note@extension-begin.h:18 {{candidate disabled due to OpenCL extension}} + // expected-note@extension-begin.h:23 {{candidate function not viable: requires 0 arguments, but 1 was provided}} } -#endif // HEADER_USER - diff --git a/test/SemaOpenCL/extension-begin.h b/test/SemaOpenCL/extension-begin.h new file mode 100644 index 000000000000..d9865ba0b33a --- /dev/null +++ b/test/SemaOpenCL/extension-begin.h @@ -0,0 +1,26 @@ +#ifndef INCLUDED +#define INCLUDED + +#pragma OPENCL EXTENSION all : begin +#pragma OPENCL EXTENSION all : end + +#pragma OPENCL EXTENSION my_ext : begin + +struct A { + int a; +}; + +typedef struct A TypedefOfA; +typedef const __private TypedefOfA* PointerOfA; + +void f(void); + +__attribute__((overloadable)) void g(long x); + +#pragma OPENCL EXTENSION my_ext : end +#pragma OPENCL EXTENSION my_ext : end + +__attribute__((overloadable)) void g(void); + +#endif // INCLUDED + diff --git a/tools/scan-build/bin/scan-build b/tools/scan-build/bin/scan-build index c50f900cc45e..88420ef94d57 100755 --- a/tools/scan-build/bin/scan-build +++ b/tools/scan-build/bin/scan-build @@ -1192,7 +1192,7 @@ OPTIONS: By default, the exit status of scan-build is the same as the executed build command. Specifying this option causes the exit status of scan-build to be 1 - if it found potential bugs and 0 otherwise. + if it found potential bugs and the exit status of the build itself otherwise. --use-cc [compiler path] --use-cc=[compiler path] @@ -1878,7 +1878,7 @@ if (defined $Options{OutputFormat}) { if ($Options{ExitStatusFoundBugs}) { exit 1 if ($NumBugs > 0); - exit 0; + exit $ExitStatus; } } } From 9508e63251dce5a919900af04d18710eefa2809f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 1 Dec 2018 15:41:59 +0000 Subject: [PATCH 3/6] Vendor import of lldb release_70 branch r348011: https://llvm.org/svn/llvm-project/lldb/branches/release_70@348011 --- source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp index f44b2bb97b2b..d26556d73e28 100644 --- a/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp +++ b/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp @@ -261,7 +261,11 @@ void DWARFUnit::ExtractDIEsRWLocked() { } if (!m_die_array.empty()) { - lldbassert(!m_first_die || m_first_die == m_die_array.front()); + if (m_first_die) { + // Only needed for the assertion. + m_first_die.SetEmptyChildren(m_die_array.front().GetEmptyChildren()); + lldbassert(m_first_die == m_die_array.front()); + } m_first_die = m_die_array.front(); } From cfd499024808f4e0f3a349db451babd5a5ebf4e1 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 8 Dec 2018 14:31:49 +0000 Subject: [PATCH 4/6] Vendor import of llvm release_70 branch r348686: https://llvm.org/svn/llvm-project/llvm/branches/release_70@348686 --- .../llvm/DebugInfo/PDB/Native/GlobalsStream.h | 2 - .../DebugInfo/PDB/Native/ModuleDebugStream.h | 2 +- include/llvm/ExecutionEngine/Orc/Core.h | 2 +- .../Orc/OrcRemoteTargetClient.h | 3 +- .../ProfileData/Coverage/CoverageMapping.h | 2 - lib/Analysis/MemorySSA.cpp | 1 - .../RuntimeDyld/RuntimeDyld.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 248 ++++++++++-------- lib/Target/AMDGPU/AMDGPULibFunc.cpp | 1 - .../InstCombine/InstCombineCompares.cpp | 8 + .../Instrumentation/DataFlowSanitizer.cpp | 6 +- test/CodeGen/AArch64/arm64-ccmp.ll | 72 ++++- ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll | 56 +++- ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll | 56 +++- 14 files changed, 307 insertions(+), 154 deletions(-) diff --git a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h index fdc58dc60f7e..dd04b5c5681d 100644 --- a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h +++ b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h @@ -30,8 +30,6 @@ class GSIHashIterator GSIHashIterator, FixedStreamArrayIterator, std::random_access_iterator_tag, const uint32_t> { public: - GSIHashIterator() = default; - template GSIHashIterator(T &&v) : GSIHashIterator::iterator_adaptor_base(std::forward(v)) {} diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h index 6602264d1b74..efc25e0559b9 100644 --- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h +++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -49,7 +49,7 @@ class ModuleDebugStreamRef { BinarySubstreamRef getC13LinesSubstream() const; BinarySubstreamRef getGlobalRefsSubstream() const; - ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = default; + ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = delete; iterator_range subsections() const; codeview::DebugSubsectionArray getSubsectionsArray() const { diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h index fd03687cfc21..11d7c091947e 100644 --- a/include/llvm/ExecutionEngine/Orc/Core.h +++ b/include/llvm/ExecutionEngine/Orc/Core.h @@ -126,7 +126,7 @@ class MaterializationResponsibility { public: MaterializationResponsibility(MaterializationResponsibility &&) = default; MaterializationResponsibility & - operator=(MaterializationResponsibility &&) = default; + operator=(MaterializationResponsibility &&) = delete; /// Destruct a MaterializationResponsibility instance. In debug mode /// this asserts that all symbols being tracked have been either diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index 739e5ba47c12..45f95f63e70f 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -70,8 +70,7 @@ class OrcRemoteTargetClient RemoteRTDyldMemoryManager & operator=(const RemoteRTDyldMemoryManager &) = delete; RemoteRTDyldMemoryManager(RemoteRTDyldMemoryManager &&) = default; - RemoteRTDyldMemoryManager & - operator=(RemoteRTDyldMemoryManager &&) = default; + RemoteRTDyldMemoryManager &operator=(RemoteRTDyldMemoryManager &&) = delete; uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID, diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index ecb284d30de0..e820f71cb6d5 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -641,8 +641,6 @@ class LineCoverageIterator this->operator++(); } - LineCoverageIterator &operator=(const LineCoverageIterator &R) = default; - bool operator==(const LineCoverageIterator &R) const { return &CD == &R.CD && Next == R.Next && Ended == R.Ended; } diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index b38c0c4f1439..6e49a39926a2 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -119,7 +119,6 @@ class MemoryLocOrCall { public: bool IsCall = false; - MemoryLocOrCall() = default; MemoryLocOrCall(MemoryUseOrDef *MUD) : MemoryLocOrCall(MUD->getMemoryInst()) {} MemoryLocOrCall(const MemoryUseOrDef *MUD) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 1189be599edd..76f5e5ead504 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -275,7 +275,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { uint64_t Size = I->getCommonSize(); if (!CommonAlign) CommonAlign = Align; - CommonSize += alignTo(CommonSize, Align) + Size; + CommonSize = alignTo(CommonSize, Align) + Size; CommonSymbolsToAllocate.push_back(*I); } } else diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index de762a7bb1d4..cfc7aa96d31f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1515,39 +1515,50 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of /// a comparison. They set the NZCV flags to a predefined value if their /// predicate is false. This allows to express arbitrary conjunctions, for -/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))" +/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))" /// expressed as: /// cmp A /// ccmp B, inv(CB), CA /// check for CB flags /// -/// In general we can create code for arbitrary "... (and (and A B) C)" -/// sequences. We can also implement some "or" expressions, because "(or A B)" -/// is equivalent to "not (and (not A) (not B))" and we can implement some -/// negation operations: -/// We can negate the results of a single comparison by inverting the flags -/// used when the predicate fails and inverting the flags tested in the next -/// instruction; We can also negate the results of the whole previous -/// conditional compare sequence by inverting the flags tested in the next -/// instruction. However there is no way to negate the result of a partial -/// sequence. +/// This naturally lets us implement chains of AND operations with SETCC +/// operands. And we can even implement some other situations by transforming +/// them: +/// - We can implement (NEG SETCC) i.e. negating a single comparison by +/// negating the flags used in a CCMP/FCCMP operations. +/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations +/// by negating the flags we test for afterwards. i.e. +/// NEG (CMP CCMP CCCMP ...) can be implemented. +/// - Note that we can only ever negate all previously processed results. +/// What we can not implement by flipping the flags to test is a negation +/// of two sub-trees (because the negation affects all sub-trees emitted so +/// far, so the 2nd sub-tree we emit would also affect the first). +/// With those tools we can implement some OR operations: +/// - (OR (SETCC A) (SETCC B)) can be implemented via: +/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B))) +/// - After transforming OR to NEG/AND combinations we may be able to use NEG +/// elimination rules from earlier to implement the whole thing as a +/// CCMP/FCCMP chain. /// -/// Therefore on encountering an "or" expression we can negate the subtree on -/// one side and have to be able to push the negate to the leafs of the subtree -/// on the other side (see also the comments in code). As complete example: -/// "or (or (setCA (cmp A)) (setCB (cmp B))) -/// (and (setCC (cmp C)) (setCD (cmp D)))" -/// is transformed to -/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D)))) -/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" -/// and implemented as: +/// As complete example: +/// or (or (setCA (cmp A)) (setCB (cmp B))) +/// (and (setCC (cmp C)) (setCD (cmp D)))" +/// can be reassociated to: +/// or (and (setCC (cmp C)) setCD (cmp D)) +// (or (setCA (cmp A)) (setCB (cmp B))) +/// can be transformed to: +/// not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) +/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))" +/// which can be implemented as: /// cmp C /// ccmp D, inv(CD), CC /// ccmp A, CA, inv(CD) /// ccmp B, CB, inv(CA) /// check for CB flags -/// A counterexample is "or (and A B) (and C D)" which cannot be implemented -/// by conditional compare sequences. +/// +/// A counterexample is "or (and A B) (and C D)" which translates to +/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we +/// can only implement 1 of the inner (not) operations, but not both! /// @{ /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate. @@ -1585,14 +1596,23 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); } -/// Returns true if @p Val is a tree of AND/OR/SETCC operations. -/// CanPushNegate is set to true if we can push a negate operation through -/// the tree in a was that we are left with AND operations and negate operations -/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to -/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be -/// brought into such a form. -static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, - unsigned Depth = 0) { +/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be +/// expressed as a conjunction. See \ref AArch64CCMP. +/// \param CanNegate Set to true if we can negate the whole sub-tree just by +/// changing the conditions on the SETCC tests. +/// (this means we can call emitConjunctionRec() with +/// Negate==true on this sub-tree) +/// \param MustBeFirst Set to true if this subtree needs to be negated and we +/// cannot do the negation naturally. We are required to +/// emit the subtree first in this case. +/// \param WillNegate Is true if are called when the result of this +/// subexpression must be negated. This happens when the +/// outer expression is an OR. We can use this fact to know +/// that we have a double negation (or (or ...) ...) that +/// can be implemented for free. +static bool canEmitConjunction(const SDValue Val, bool &CanNegate, + bool &MustBeFirst, bool WillNegate, + unsigned Depth = 0) { if (!Val.hasOneUse()) return false; unsigned Opcode = Val->getOpcode(); @@ -1600,39 +1620,44 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, if (Val->getOperand(0).getValueType() == MVT::f128) return false; CanNegate = true; + MustBeFirst = false; return true; } // Protect against exponential runtime and stack overflow. if (Depth > 6) return false; if (Opcode == ISD::AND || Opcode == ISD::OR) { + bool IsOR = Opcode == ISD::OR; SDValue O0 = Val->getOperand(0); SDValue O1 = Val->getOperand(1); bool CanNegateL; - if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1)) + bool MustBeFirstL; + if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1)) return false; bool CanNegateR; - if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1)) + bool MustBeFirstR; + if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1)) return false; - if (Opcode == ISD::OR) { - // For an OR expression we need to be able to negate at least one side or - // we cannot do the transformation at all. + if (MustBeFirstL && MustBeFirstR) + return false; + + if (IsOR) { + // For an OR expression we need to be able to naturally negate at least + // one side or we cannot do the transformation at all. if (!CanNegateL && !CanNegateR) return false; - // We can however change a (not (or x y)) to (and (not x) (not y)) if we - // can negate the x and y subtrees. - CanNegate = CanNegateL && CanNegateR; + // If we the result of the OR will be negated and we can naturally negate + // the leafs, then this sub-tree as a whole negates naturally. + CanNegate = WillNegate && CanNegateL && CanNegateR; + // If we cannot naturally negate the whole sub-tree, then this must be + // emitted first. + MustBeFirst = !CanNegate; } else { - // If the operands are OR expressions then we finally need to negate their - // outputs, we can only do that for the operand with emitted last by - // negating OutCC, not for both operands. - bool NeedsNegOutL = O0->getOpcode() == ISD::OR; - bool NeedsNegOutR = O1->getOpcode() == ISD::OR; - if (NeedsNegOutL && NeedsNegOutR) - return false; - // We cannot negate an AND operation (it would become an OR), + assert(Opcode == ISD::AND && "Must be OR or AND"); + // We cannot naturally negate an AND operation. CanNegate = false; + MustBeFirst = MustBeFirstL || MustBeFirstR; } return true; } @@ -1645,11 +1670,9 @@ static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate, /// and conditional compare operations. @returns an NZCV flags producing node /// and sets @p OutCC to the flags that should be tested or returns SDValue() if /// transformation was not possible. -/// On recursive invocations @p PushNegate may be set to true to have negation -/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate -/// for the comparisons in the current subtree; @p Depth limits the search -/// depth to avoid stack overflow. -static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, +/// \p Negate is true if we want this sub-tree being negated just by changing +/// SETCC conditions. +static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate) { // We're at a tree leaf, produce a conditional comparison operation. @@ -1690,76 +1713,85 @@ static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val, return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL, DAG); } - assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && - "Valid conjunction/disjunction tree"); + assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); + + bool IsOR = Opcode == ISD::OR; - // Check if both sides can be transformed. SDValue LHS = Val->getOperand(0); + bool CanNegateL; + bool MustBeFirstL; + bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR); + assert(ValidL && "Valid conjunction/disjunction tree"); + (void)ValidL; + SDValue RHS = Val->getOperand(1); + bool CanNegateR; + bool MustBeFirstR; + bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR); + assert(ValidR && "Valid conjunction/disjunction tree"); + (void)ValidR; - // In case of an OR we need to negate our operands and the result. - // (A v B) <=> not(not(A) ^ not(B)) - bool NegateOpsAndResult = Opcode == ISD::OR; - // We can negate the results of all previous operations by inverting the - // predicate flags giving us a free negation for one side. The other side - // must be negatable by itself. - if (NegateOpsAndResult) { - // See which side we can negate. - bool CanNegateL; - bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL); - assert(isValidL && "Valid conjunction/disjunction tree"); - (void)isValidL; - -#ifndef NDEBUG - bool CanNegateR; - bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR); - assert(isValidR && "Valid conjunction/disjunction tree"); - assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"); -#endif - - // Order the side which we cannot negate to RHS so we can emit it first. - if (!CanNegateL) - std::swap(LHS, RHS); - } else { - bool NeedsNegOutL = LHS->getOpcode() == ISD::OR; - assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && - "Valid conjunction/disjunction tree"); - // Order the side where we need to negate the output flags to RHS so it - // gets emitted first. - if (NeedsNegOutL) - std::swap(LHS, RHS); + // Swap sub-tree that must come first to the right side. + if (MustBeFirstL) { + assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); + std::swap(LHS, RHS); + std::swap(CanNegateL, CanNegateR); + std::swap(MustBeFirstL, MustBeFirstR); } - // Emit RHS. If we want to negate the tree we only need to push a negate - // through if we are already in a PushNegate case, otherwise we can negate - // the "flags to test" afterwards. + bool NegateR; + bool NegateAfterR; + bool NegateL; + bool NegateAfterAll; + if (Opcode == ISD::OR) { + // Swap the sub-tree that we can negate naturally to the left. + if (!CanNegateL) { + assert(CanNegateR && "at least one side must be negatable"); + assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); + assert(!Negate); + std::swap(LHS, RHS); + NegateR = false; + NegateAfterR = true; + } else { + // Negate the left sub-tree if possible, otherwise negate the result. + NegateR = CanNegateR; + NegateAfterR = !CanNegateR; + } + NegateL = true; + NegateAfterAll = !Negate; + } else { + assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree"); + assert(!Negate && "Valid conjunction/disjunction tree"); + + NegateL = false; + NegateR = false; + NegateAfterR = false; + NegateAfterAll = false; + } + + // Emit sub-trees. AArch64CC::CondCode RHSCC; - SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate, - CCOp, Predicate); - if (NegateOpsAndResult && !Negate) + SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate); + if (NegateAfterR) RHSCC = AArch64CC::getInvertedCondCode(RHSCC); - // Emit LHS. We may need to negate it. - SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC, - NegateOpsAndResult, CmpR, - RHSCC); - // If we transformed an OR to and AND then we have to negate the result - // (or absorb the Negate parameter). - if (NegateOpsAndResult && !Negate) + SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC); + if (NegateAfterAll) OutCC = AArch64CC::getInvertedCondCode(OutCC); return CmpL; } -/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain -/// of CCMP/CFCMP ops. See @ref AArch64CCMP. -/// \see emitConjunctionDisjunctionTreeRec(). -static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val, - AArch64CC::CondCode &OutCC) { - bool CanNegate; - if (!isConjunctionDisjunctionTree(Val, CanNegate)) +/// Emit expression as a conjunction (a series of CCMP/CFCMP ops). +/// In some cases this is even possible with OR operations in the expression. +/// See \ref AArch64CCMP. +/// \see emitConjunctionRec(). +static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, + AArch64CC::CondCode &OutCC) { + bool DummyCanNegate; + bool DummyMustBeFirst; + if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false)) return SDValue(); - return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(), - AArch64CC::AL); + return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL); } /// @} @@ -1859,7 +1891,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) { - if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) { + if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) { if ((CC == ISD::SETNE) ^ RHSC->isNullValue()) AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); } diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp index 4671273d61f9..f37795e961e8 100644 --- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -90,7 +90,6 @@ class UnmangledFuncInfo { public: using ID = AMDGPULibFunc::EFuncId; - UnmangledFuncInfo() = default; UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs) : Name(_Name), NumArgs(_NumArgs) {} // Get index to Table by function name. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6de92a4842ab..e1bae11b40d1 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2924,12 +2924,20 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I, // x & (-1 >> y) s>= x -> x s<= (-1 >> y) if (X != I.getOperand(1)) // X must be on RHS of comparison! return nullptr; // Ignore the other case. + if (!match(M, m_Constant())) // Can not do this fold with non-constant. + return nullptr; + if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. + return nullptr; DstPred = ICmpInst::Predicate::ICMP_SLE; break; case ICmpInst::Predicate::ICMP_SLT: // x & (-1 >> y) s< x -> x s> (-1 >> y) if (X != I.getOperand(1)) // X must be on RHS of comparison! return nullptr; // Ignore the other case. + if (!match(M, m_Constant())) // Can not do this fold with non-constant. + return nullptr; + if (!match(M, m_NonNegative())) // Must not have any -1 vector elements. + return nullptr; DstPred = ICmpInst::Predicate::ICMP_SGT; break; case ICmpInst::Predicate::ICMP_SLE: diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index bb0e4379d1a8..f03fcc9c4e2c 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -231,17 +231,17 @@ struct TransformedFunction { TransformedFunction& operator=(TransformedFunction&&) = default; /// Type of the function before the transformation. - FunctionType* const OriginalType; + FunctionType *OriginalType; /// Type of the function after the transformation. - FunctionType* const TransformedType; + FunctionType *TransformedType; /// Transforming a function may change the position of arguments. This /// member records the mapping from each argument's old position to its new /// position. Argument positions are zero-indexed. If the transformation /// from F to F' made the first argument of F into the third argument of F', /// then ArgumentIndexMapping[0] will equal 2. - const std::vector ArgumentIndexMapping; + std::vector ArgumentIndexMapping; }; /// Given function attributes from a call site for the original function, diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index b18e638a3a94..6b497e8f7bfd 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -526,8 +526,8 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3 ; CHECK-LABEL: select_or_one_olt: ; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 -; CHECK-NEXT: fccmp d0, d1, #1, ne -; CHECK-NEXT: fccmp d2, d3, #8, vs +; CHECK-NEXT: fccmp d0, d1, #8, le +; CHECK-NEXT: fccmp d2, d3, #8, pl ; CHECK-NEXT: csel w0, w0, w1, mi ; CHECK-NEXT: ret define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { @@ -556,8 +556,8 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3 ; CHECK-LABEL: select_or_ueq_olt: ; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 -; CHECK-NEXT: fccmp d0, d1, #8, le -; CHECK-NEXT: fccmp d2, d3, #8, mi +; CHECK-NEXT: fccmp d0, d1, #1, ne +; CHECK-NEXT: fccmp d2, d3, #8, vc ; CHECK-NEXT: csel w0, w0, w1, mi ; CHECK-NEXT: ret define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 { @@ -656,4 +656,68 @@ define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, ret i32 %sel } +; This testcase resembles the core problem of http://llvm.org/PR39550 +; (an OR operation is 2 levels deep but needs to be implemented first) +; CHECK-LABEL: deep_or +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w1, #0, #4, eq +; CHECK-NEXT: ccmp w0, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %or, %c1 + %and1 = and i1 %and0, %c0 + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + +; Variation of deep_or, we still need to implement the OR first though. +; CHECK-LABEL: deep_or1 +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w0, #0, #4, eq +; CHECK-NEXT: ccmp w1, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %c0, %or + %and1 = and i1 %and0, %c1 + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + +; Variation of deep_or, we still need to implement the OR first though. +; CHECK-LABEL: deep_or2 +; CHECK: cmp w2, #20 +; CHECK-NEXT: ccmp w2, #15, #4, ne +; CHECK-NEXT: ccmp w1, #0, #4, eq +; CHECK-NEXT: ccmp w0, #0, #4, ne +; CHECK-NEXT: csel w0, w4, w5, ne +; CHECK-NEXT: ret +define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { + %c0 = icmp ne i32 %a0, 0 + %c1 = icmp ne i32 %a1, 0 + %c2 = icmp eq i32 %a2, 15 + %c3 = icmp eq i32 %a2, 20 + + %or = or i1 %c2, %c3 + %and0 = and i1 %c0, %c1 + %and1 = and i1 %and0, %or + %sel = select i1 %and1, i32 %x, i32 %y + ret i32 %sel +} + attributes #0 = { nounwind } diff --git a/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll index 7be784a452fd..ca1b86c0623a 100644 --- a/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll +++ b/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll @@ -23,18 +23,6 @@ define i1 @p0(i8 %x) { ret i1 %ret } -define i1 @pv(i8 %x, i8 %y) { -; CHECK-LABEL: @pv( -; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sge i8 [[TMP0]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %tmp0 = lshr i8 -1, %y - %tmp1 = and i8 %tmp0, %x - %ret = icmp sge i8 %tmp1, %x - ret i1 %ret -} - ; ============================================================================ ; ; Vector tests ; ============================================================================ ; @@ -120,8 +108,9 @@ define i1 @cv0(i8 %y) { ; CHECK-LABEL: @cv0( ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sle i8 [[X]], [[TMP0]] -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], [[TMP0]] +; CHECK-NEXT: [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]] +; CHECK-NEXT: ret i1 [[RET]] ; %x = call i8 @gen8() %tmp0 = lshr i8 -1, %y @@ -196,3 +185,42 @@ define <2 x i1> @n2(<2 x i8> %x) { %ret = icmp sge <2 x i8> %tmp0, %x ret <2 x i1> %ret } + +; ============================================================================ ; +; Potential miscompiles. +; ============================================================================ ; + +define i1 @nv(i8 %x, i8 %y) { +; CHECK-LABEL: @nv( +; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]] +; CHECK-NEXT: ret i1 [[RET]] +; + %tmp0 = lshr i8 -1, %y + %tmp1 = and i8 %tmp0, %x + %ret = icmp sge i8 %tmp1, %x + ret i1 %ret +} + +define <2 x i1> @n3_vec(<2 x i8> %x) { +; CHECK-LABEL: @n3_vec( +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[RET]] +; + %tmp0 = and <2 x i8> %x, + %ret = icmp sge <2 x i8> %tmp0, %x + ret <2 x i1> %ret +} + +define <3 x i1> @n4_vec(<3 x i8> %x) { +; CHECK-LABEL: @n4_vec( +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp sge <3 x i8> [[TMP0]], [[X]] +; CHECK-NEXT: ret <3 x i1> [[RET]] +; + %tmp0 = and <3 x i8> %x, + %ret = icmp sge <3 x i8> %tmp0, %x + ret <3 x i1> %ret +} diff --git a/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll index d1792d1e075b..2957ad5731c7 100644 --- a/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll +++ b/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll @@ -23,18 +23,6 @@ define i1 @p0(i8 %x) { ret i1 %ret } -define i1 @pv(i8 %x, i8 %y) { -; CHECK-LABEL: @pv( -; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[TMP0]], [[X:%.*]] -; CHECK-NEXT: ret i1 [[TMP1]] -; - %tmp0 = lshr i8 -1, %y - %tmp1 = and i8 %tmp0, %x - %ret = icmp slt i8 %tmp1, %x - ret i1 %ret -} - ; ============================================================================ ; ; Vector tests ; ============================================================================ ; @@ -120,8 +108,9 @@ define i1 @cv0(i8 %y) { ; CHECK-LABEL: @cv0( ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() ; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X]], [[TMP0]] -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X]], [[TMP0]] +; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]] +; CHECK-NEXT: ret i1 [[RET]] ; %x = call i8 @gen8() %tmp0 = lshr i8 -1, %y @@ -196,3 +185,42 @@ define <2 x i1> @n2(<2 x i8> %x) { %ret = icmp slt <2 x i8> %tmp0, %x ret <2 x i1> %ret } + +; ============================================================================ ; +; Potential miscompiles. +; ============================================================================ ; + +define i1 @nv(i8 %x, i8 %y) { +; CHECK-LABEL: @nv( +; CHECK-NEXT: [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]] +; CHECK-NEXT: [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]] +; CHECK-NEXT: ret i1 [[RET]] +; + %tmp0 = lshr i8 -1, %y + %tmp1 = and i8 %tmp0, %x + %ret = icmp slt i8 %tmp1, %x + ret i1 %ret +} + +define <2 x i1> @n3(<2 x i8> %x) { +; CHECK-LABEL: @n3( +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]] +; CHECK-NEXT: ret <2 x i1> [[RET]] +; + %tmp0 = and <2 x i8> %x, + %ret = icmp slt <2 x i8> %tmp0, %x + ret <2 x i1> %ret +} + +define <3 x i1> @n4(<3 x i8> %x) { +; CHECK-LABEL: @n4( +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], +; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[TMP0]], [[X]] +; CHECK-NEXT: ret <3 x i1> [[RET]] +; + %tmp0 = and <3 x i8> %x, + %ret = icmp slt <3 x i8> %tmp0, %x + ret <3 x i1> %ret +} From 52c5eb8567b3104e357ad43927aa605be3246c6f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 8 Dec 2018 14:31:58 +0000 Subject: [PATCH 5/6] Vendor import of clang release_70 branch r348686: https://llvm.org/svn/llvm-project/cfe/branches/release_70@348686 --- include/clang/AST/DeclBase.h | 1 - include/clang/Basic/Attr.td | 33 ++++++ lib/AST/ASTContext.cpp | 4 + lib/AST/Decl.cpp | 24 +++- lib/CodeGen/CodeGenFunction.cpp | 82 ++++--------- lib/CodeGen/CodeGenFunction.h | 39 +++--- lib/CodeGen/CodeGenModule.cpp | 73 ++++++++++-- lib/Driver/ToolChains/OpenBSD.cpp | 2 +- lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp | 7 +- .../svalbuilder-rearrange-comparisons.c | 14 +++ test/CodeGen/attr-cpuspecific.c | 9 ++ test/CodeGenCXX/attr-cpuspecific.cpp | 18 +++ .../attr-target-mv-member-funcs.cpp | 26 ++-- .../visibility-inlines-hidden-staticvar.cpp | 111 ++++++++++++++++++ test/Driver/openbsd.c | 2 +- 15 files changed, 329 insertions(+), 116 deletions(-) create mode 100644 test/CodeGenCXX/attr-cpuspecific.cpp create mode 100644 test/CodeGenCXX/visibility-inlines-hidden-staticvar.cpp diff --git a/include/clang/AST/DeclBase.h b/include/clang/AST/DeclBase.h index 81df1c0b6aa9..cc3072c8b2be 100644 --- a/include/clang/AST/DeclBase.h +++ b/include/clang/AST/DeclBase.h @@ -1216,7 +1216,6 @@ class DeclContextLookupResult { value_type SingleElement; public: - iterator() = default; explicit iterator(pointer Pos, value_type Single = nullptr) : IteratorBase(Pos), SingleElement(Single) {} diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td index 1f17819dba74..dc9edace50d4 100644 --- a/include/clang/Basic/Attr.td +++ b/include/clang/Basic/Attr.td @@ -1953,6 +1953,39 @@ def Target : InheritableAttr { return parse(getFeaturesStr()); } + StringRef getArchitecture() const { + StringRef Features = getFeaturesStr(); + if (Features == "default") return {}; + + SmallVector AttrFeatures; + Features.split(AttrFeatures, ","); + + for (auto &Feature : AttrFeatures) { + Feature = Feature.trim(); + if (Feature.startswith("arch=")) + return Feature.drop_front(sizeof("arch=") - 1); + } + return ""; + } + + // Gets the list of features as simple string-refs with no +/- or 'no-'. + // Only adds the items to 'Out' that are additions. + void getAddedFeatures(llvm::SmallVectorImpl &Out) const { + StringRef Features = getFeaturesStr(); + if (Features == "default") return; + + SmallVector AttrFeatures; + Features.split(AttrFeatures, ","); + + for (auto &Feature : AttrFeatures) { + Feature = Feature.trim(); + + if (!Feature.startswith("no-") && !Feature.startswith("arch=") && + !Feature.startswith("fpmath=") && !Feature.startswith("tune=")) + Out.push_back(Feature); + } + } + template ParsedTargetAttr parse(Compare cmp) const { ParsedTargetAttr Attrs = parse(); diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index c085f52cae31..648fa9f0fbc6 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -9734,6 +9734,10 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { return true; if (const auto *FD = dyn_cast(D)) { + // Multiversioned functions always have to be emitted, because they are used + // by the resolver. + if (FD->isMultiVersion()) + return true; // Forward declarations aren't required. if (!FD->doesThisDeclarationHaveABody()) return FD->doesDeclarationForceExternallyVisibleDefinition(); diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 8030dd0c2f41..1bf01b901232 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -725,7 +725,7 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D, // If we're paying attention to global visibility, apply // -finline-visibility-hidden if this is an inline method. if (useInlineVisibilityHidden(D)) - LV.mergeVisibility(HiddenVisibility, true); + LV.mergeVisibility(HiddenVisibility, /*visibilityExplicit=*/false); } } @@ -915,7 +915,7 @@ LinkageComputer::getLVForClassMember(const NamedDecl *D, // Note that we do this before merging information about // the class visibility. if (!LV.isVisibilityExplicit() && useInlineVisibilityHidden(D)) - LV.mergeVisibility(HiddenVisibility, true); + LV.mergeVisibility(HiddenVisibility, /*visibilityExplicit=*/false); } // If this class member has an explicit visibility attribute, the only @@ -1262,7 +1262,27 @@ LinkageInfo LinkageComputer::getLVForLocalDecl(const NamedDecl *D, !isTemplateInstantiation(FD->getTemplateSpecializationKind())) return LinkageInfo::none(); + // If a function is hidden by -fvisibility-inlines-hidden option and + // is not explicitly attributed as a hidden function, + // we should not make static local variables in the function hidden. LV = getLVForDecl(FD, computation); + if (isa(D) && useInlineVisibilityHidden(FD) && + !LV.isVisibilityExplicit()) { + assert(cast(D)->isStaticLocal()); + // If this was an implicitly hidden inline method, check again for + // explicit visibility on the parent class, and use that for static locals + // if present. + if (const auto *MD = dyn_cast(FD)) + LV = getLVForDecl(MD->getParent(), computation); + if (!LV.isVisibilityExplicit()) { + Visibility globalVisibility = + computation.isValueVisibility() + ? Context.getLangOpts().getValueVisibilityMode() + : Context.getLangOpts().getTypeVisibilityMode(); + return LinkageInfo(VisibleNoLinkage, globalVisibility, + /*visibilityExplicit=*/false); + } + } } if (!isExternallyVisible(LV.getLinkage())) return LinkageInfo::none(); diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 3c582688e91e..166d588dd55b 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -2359,91 +2359,53 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } -llvm::Value *CodeGenFunction::FormResolverCondition( - const TargetMultiVersionResolverOption &RO) { - llvm::Value *TrueCondition = nullptr; - if (!RO.ParsedAttribute.Architecture.empty()) - TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture); +llvm::Value * +CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { + llvm::Value *Condition = nullptr; - if (!RO.ParsedAttribute.Features.empty()) { - SmallVector FeatureList; - llvm::for_each(RO.ParsedAttribute.Features, - [&FeatureList](const std::string &Feature) { - FeatureList.push_back(StringRef{Feature}.substr(1)); - }); - llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList); - TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp) - : FeatureCmp; + if (!RO.Conditions.Architecture.empty()) + Condition = EmitX86CpuIs(RO.Conditions.Architecture); + + if (!RO.Conditions.Features.empty()) { + llvm::Value *FeatureCond = EmitX86CpuSupports(RO.Conditions.Features); + Condition = + Condition ? Builder.CreateAnd(Condition, FeatureCond) : FeatureCond; } - return TrueCondition; + return Condition; } -void CodeGenFunction::EmitTargetMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef Options) { +void CodeGenFunction::EmitMultiVersionResolver( + llvm::Function *Resolver, ArrayRef Options) { assert((getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86 || getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) && "Only implemented for x86 targets"); - - // Main function's basic block. - llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver); - Builder.SetInsertPoint(CurBlock); - EmitX86CpuInit(); - - llvm::Function *DefaultFunc = nullptr; - for (const TargetMultiVersionResolverOption &RO : Options) { - Builder.SetInsertPoint(CurBlock); - llvm::Value *TrueCondition = FormResolverCondition(RO); - - if (!TrueCondition) { - DefaultFunc = RO.Function; - } else { - llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); - CurBlock = createBasicBlock("ro_else", Resolver); - Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); - } - } - - assert(DefaultFunc && "No default version?"); - // Emit return from the 'else-ist' block. - Builder.SetInsertPoint(CurBlock); - Builder.CreateRet(DefaultFunc); -} - -void CodeGenFunction::EmitCPUDispatchMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef Options) { - assert((getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86 || - getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86_64) && - "Only implemented for x86 targets"); - // Main function's basic block. llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); Builder.SetInsertPoint(CurBlock); EmitX86CpuInit(); - for (const CPUDispatchMultiVersionResolverOption &RO : Options) { + for (const MultiVersionResolverOption &RO : Options) { Builder.SetInsertPoint(CurBlock); + llvm::Value *Condition = FormResolverCondition(RO); - // "generic" case should catch-all. - if (RO.FeatureMask == 0) { + // The 'default' or 'generic' case. + if (!Condition) { + assert(&RO == Options.end() - 1 && + "Default or Generic case must be last"); Builder.CreateRet(RO.Function); return; } + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); llvm::IRBuilder<> RetBuilder(RetBlock); RetBuilder.CreateRet(RO.Function); CurBlock = createBasicBlock("resolver_else", Resolver); - llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask); - Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + Builder.CreateCondBr(Condition, RetBlock, CurBlock); } + // If no generic/default, emit an unreachable. Builder.SetInsertPoint(CurBlock); llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); TrapCall->setDoesNotReturn(); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 878923a85bdf..d374f3943661 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -4247,30 +4247,26 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); - struct TargetMultiVersionResolverOption { + struct MultiVersionResolverOption { llvm::Function *Function; - TargetAttr::ParsedTargetAttr ParsedAttribute; - unsigned Priority; - TargetMultiVersionResolverOption( - const TargetInfo &TargInfo, llvm::Function *F, - const clang::TargetAttr::ParsedTargetAttr &PT) - : Function(F), ParsedAttribute(PT), Priority(0u) { - for (StringRef Feat : PT.Features) - Priority = std::max(Priority, - TargInfo.multiVersionSortPriority(Feat.substr(1))); + struct Conds { + StringRef Architecture; + llvm::SmallVector Features; - if (!PT.Architecture.empty()) - Priority = std::max(Priority, - TargInfo.multiVersionSortPriority(PT.Architecture)); - } + Conds(StringRef Arch, ArrayRef Feats) + : Architecture(Arch), Features(Feats.begin(), Feats.end()) {} + } Conditions; - bool operator>(const TargetMultiVersionResolverOption &Other) const { - return Priority > Other.Priority; - } + MultiVersionResolverOption(llvm::Function *F, StringRef Arch, + ArrayRef Feats) + : Function(F), Conditions(Arch, Feats) {} }; - void EmitTargetMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef Options); + + // Emits the body of a multiversion function's resolver. Assumes that the + // options are already sorted in the proper order, with the 'default' option + // last (if it exists). + void EmitMultiVersionResolver(llvm::Function *Resolver, + ArrayRef Options); struct CPUDispatchMultiVersionResolverOption { llvm::Function *Function; @@ -4306,8 +4302,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitX86CpuSupports(ArrayRef FeatureStrs); llvm::Value *EmitX86CpuSupports(uint32_t Mask); llvm::Value *EmitX86CpuInit(); - llvm::Value * - FormResolverCondition(const TargetMultiVersionResolverOption &RO); + llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); }; inline DominatingLLVMValue::saved_type diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 76112e191c71..3e33735c5040 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -2399,9 +2399,22 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); +static unsigned +TargetMVPriority(const TargetInfo &TI, + const CodeGenFunction::MultiVersionResolverOption &RO) { + unsigned Priority = 0; + for (StringRef Feat : RO.Conditions.Features) + Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + + if (!RO.Conditions.Architecture.empty()) + Priority = std::max( + Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); + return Priority; +} + void CodeGenModule::emitMultiVersionFunctions() { for (GlobalDecl GD : MultiVersionFuncs) { - SmallVector Options; + SmallVector Options; const FunctionDecl *FD = cast(GD.getDecl()); getContext().forEachMultiversionedFunctionVersion( FD, [this, &GD, &Options](const FunctionDecl *CurFD) { @@ -2422,8 +2435,13 @@ void CodeGenModule::emitMultiVersionFunctions() { } assert(Func && "This should have just been created"); } - Options.emplace_back(getTarget(), cast(Func), - CurFD->getAttr()->parse()); + + const auto *TA = CurFD->getAttr(); + llvm::SmallVector Feats; + TA->getAddedFeatures(Feats); + + Options.emplace_back(cast(Func), + TA->getArchitecture(), Feats); }); llvm::Function *ResolverFunc = cast( @@ -2431,11 +2449,16 @@ void CodeGenModule::emitMultiVersionFunctions() { if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); + + const TargetInfo &TI = getTarget(); std::stable_sort( Options.begin(), Options.end(), - std::greater()); + [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + }); CodeGenFunction CGF(*this); - CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); } } @@ -2444,7 +2467,13 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr(); assert(DD && "Not a cpu_dispatch Function?"); - llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + QualType CanonTy = Context.getCanonicalType(FD->getType()); + llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD); + + if (const auto *CXXFD = dyn_cast(FD)) { + const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); + DeclTy = getTypes().GetFunctionType(FInfo); + } StringRef ResolverName = getMangledName(GD); llvm::Type *ResolverType = llvm::FunctionType::get( @@ -2455,15 +2484,14 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, /*ForVTable=*/false)); - SmallVector - Options; + SmallVector Options; const TargetInfo &Target = getTarget(); for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); llvm::Constant *Func = GetOrCreateLLVMFunction( - MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, + MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); llvm::SmallVector Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); @@ -2473,15 +2501,34 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { Features.begin(), Features.end(), [&Target](StringRef Feat) { return !Target.validateCpuSupports(Feat); }), Features.end()); - Options.emplace_back(cast(Func), - CodeGenFunction::GetX86CpuSupportsMask(Features)); + Options.emplace_back(cast(Func), StringRef{}, Features); } llvm::sort( Options.begin(), Options.end(), - std::greater()); + [](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) > + CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features); + }); + + // If the list contains multiple 'default' versions, such as when it contains + // 'pentium' and 'generic', don't emit the call to the generic one (since we + // always run on at least a 'pentium'). We do this by deleting the 'least + // advanced' (read, lowest mangling letter). + while (Options.size() > 1 && + CodeGenFunction::GetX86CpuSupportsMask( + (Options.end() - 2)->Conditions.Features) == 0) { + StringRef LHSName = (Options.end() - 2)->Function->getName(); + StringRef RHSName = (Options.end() - 1)->Function->getName(); + if (LHSName.compare(RHSName) < 0) + Options.erase(Options.end() - 2); + else + Options.erase(Options.end() - 1); + } + CodeGenFunction CGF(*this); - CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); } /// If an ifunc for the specified mangled name is not in the module, create and diff --git a/lib/Driver/ToolChains/OpenBSD.cpp b/lib/Driver/ToolChains/OpenBSD.cpp index 7b98cd62bbfc..432f6079e387 100644 --- a/lib/Driver/ToolChains/OpenBSD.cpp +++ b/lib/Driver/ToolChains/OpenBSD.cpp @@ -138,7 +138,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_pie)) CmdArgs.push_back("-pie"); - if (Args.hasArg(options::OPT_nopie)) + if (Args.hasArg(options::OPT_nopie) || Args.hasArg(options::OPT_pg)) CmdArgs.push_back("-nopie"); if (Output.isFilename()) { diff --git a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 62c54fc956a9..da9dd3406e14 100644 --- a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -475,9 +475,6 @@ static Optional tryRearrange(ProgramStateRef State, SingleTy = ResultTy; if (LSym->getType() != SingleTy) return None; - // Substracting unsigned integers is a nightmare. - if (!SingleTy->isSignedIntegerOrEnumerationType()) - return None; } else { // Don't rearrange other operations. return None; @@ -485,6 +482,10 @@ static Optional tryRearrange(ProgramStateRef State, assert(!SingleTy.isNull() && "We should have figured out the type by now!"); + // Rearrange signed symbolic expressions only + if (!SingleTy->isSignedIntegerOrEnumerationType()) + return None; + SymbolRef RSym = Rhs.getAsSymbol(); if (!RSym || RSym->getType() != SingleTy) return None; diff --git a/test/Analysis/svalbuilder-rearrange-comparisons.c b/test/Analysis/svalbuilder-rearrange-comparisons.c index ac186120fe11..ca2099cf7fda 100644 --- a/test/Analysis/svalbuilder-rearrange-comparisons.c +++ b/test/Analysis/svalbuilder-rearrange-comparisons.c @@ -934,3 +934,17 @@ int mixed_integer_types(int x, int y) { short a = x - 1U; return a - y; } + +unsigned gu(); +unsigned fu() { + unsigned x = gu(); + // Assert that no overflows occur in this test file. + // Assuming that concrete integers are also within that range. + assert(x <= ((unsigned)UINT_MAX / 4)); + return x; +} + +void unsigned_concrete_int_no_crash() { + unsigned x = fu() + 1U, y = fu() + 1U; + clang_analyzer_dump(x == y); // expected-warning {{((conj_$2{unsigned int}) + 1U) == ((conj_$7{unsigned int}) + 1U)}} +} diff --git a/test/CodeGen/attr-cpuspecific.c b/test/CodeGen/attr-cpuspecific.c index 1b98b5dc9678..6005456f0b29 100644 --- a/test/CodeGen/attr-cpuspecific.c +++ b/test/CodeGen/attr-cpuspecific.c @@ -96,6 +96,15 @@ void HasGeneric(void); // CHECK: ret void ()* @HasGeneric.A // CHECK-NOT: call void @llvm.trap +__attribute__((cpu_dispatch(atom, generic, pentium))) +int GenericAndPentium(int i, double d); +// CHECK: define i32 (i32, double)* @GenericAndPentium.resolver() +// CHECK: call void @__cpu_indicator_init +// CHECK: ret i32 (i32, double)* @GenericAndPentium.O +// CHECK: ret i32 (i32, double)* @GenericAndPentium.B +// CHECK-NOT: ret i32 (i32, double)* @GenericAndPentium.A +// CHECK-NOT: call void @llvm.trap + // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" diff --git a/test/CodeGenCXX/attr-cpuspecific.cpp b/test/CodeGenCXX/attr-cpuspecific.cpp new file mode 100644 index 000000000000..d9b7b54e7a7d --- /dev/null +++ b/test/CodeGenCXX/attr-cpuspecific.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX + +struct S { + __attribute__((cpu_specific(atom))) + void Func(){} + __attribute__((cpu_dispatch(ivybridge,atom))) + void Func(){} +}; + +void foo() { + S s; + s.Func(); +} + +// LINUX: define linkonce_odr void @_ZN1S4FuncEv.O +// LINUX: define void (%struct.S*)* @_ZN1S4FuncEv.resolver +// LINUX: ret void (%struct.S*)* @_ZN1S4FuncEv.S +// LINUX: ret void (%struct.S*)* @_ZN1S4FuncEv.O diff --git a/test/CodeGenCXX/attr-target-mv-member-funcs.cpp b/test/CodeGenCXX/attr-target-mv-member-funcs.cpp index 622b738ad284..3d24c7a04d18 100644 --- a/test/CodeGenCXX/attr-target-mv-member-funcs.cpp +++ b/test/CodeGenCXX/attr-target-mv-member-funcs.cpp @@ -72,6 +72,15 @@ int templ_use() { // CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver // CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver +// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) +// CHECK: ret i32 0 + +// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) +// CHECK: ret i32 1 + +// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) +// CHECK: ret i32 2 + // CHECK: define i32 @_Z3barv() // CHECK: %s = alloca %struct.S, align 1 // CHECK: %s2 = alloca %struct.S, align 1 @@ -123,23 +132,14 @@ int templ_use() { // CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2 // CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) -// CHECK: ret i32 0 - -// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) -// CHECK: ret i32 1 - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) -// CHECK: ret i32 2 - // CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge // CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge // CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi // CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge // CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge // CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi +// +// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) +// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge +// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge diff --git a/test/CodeGenCXX/visibility-inlines-hidden-staticvar.cpp b/test/CodeGenCXX/visibility-inlines-hidden-staticvar.cpp new file mode 100644 index 000000000000..c0fa57d859c7 --- /dev/null +++ b/test/CodeGenCXX/visibility-inlines-hidden-staticvar.cpp @@ -0,0 +1,111 @@ +// RUN: %clang_cc1 -triple i386-unknown-unknown -std=c++11 -fvisibility-inlines-hidden -emit-llvm -o - %s -O2 -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -std=c++11 -emit-llvm -o - %s -O2 -disable-llvm-passes | FileCheck -check-prefixes=CHECK-NO-VIH %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -std=c++11 -fvisibility hidden -fvisibility-inlines-hidden -emit-llvm -o - %s -O2 -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-VIS-HIDDEN +// RUN: %clang_cc1 -triple i386-unknown-unknown -std=c++11 -fvisibility protected -fvisibility-inlines-hidden -emit-llvm -o - %s -O2 -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-VIS-PROTECTED + +// When a function is hidden due to -fvisibility-inlines-hidden option, static local variables of the function should not be hidden by the option. + +// CHECK-DAG: @_ZZ4funcvE3var = internal global i32 0 +// CHECK-DAG: @_ZZ11hidden_funcvE3var = internal global i32 0 +// CHECK-DAG: @_ZZ12default_funcvE3var = internal global i32 0 +// CHECK-DAG: @_ZZ11inline_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-DAG: @_ZZ18inline_hidden_funcvE3var = linkonce_odr hidden global i32 0, comdat +// CHECK-DAG: @_ZZ19inline_default_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-DAG: @_ZZN13ExportedClass10inl_methodEvE3var = linkonce_odr global i32 0, comdat, align 4 +// CHECK-DAG: define i32 @_Z4funcv() +// CHECK-DAG: define hidden i32 @_Z11hidden_funcv() +// CHECK-DAG: define i32 @_Z12default_funcv() +// CHECK-DAG: define linkonce_odr hidden i32 @_Z11inline_funcv() +// CHECK-DAG: define linkonce_odr hidden i32 @_Z18inline_hidden_funcv() +// CHECK-DAG: define linkonce_odr i32 @_Z19inline_default_funcv() +// CHECK-DAG: define linkonce_odr hidden i32 @_ZN13ExportedClass10inl_methodEv({{.*}}) +// CHECK-DAG: define i32 @_ZN13ExportedClass10ext_methodEv({{.*}}) + +// CHECK-NO-VIH-DAG: @_ZZ4funcvE3var = internal global i32 0 +// CHECK-NO-VIH-DAG: @_ZZ11hidden_funcvE3var = internal global i32 0 +// CHECK-NO-VIH-DAG: @_ZZ12default_funcvE3var = internal global i32 0 +// CHECK-NO-VIH-DAG: @_ZZ11inline_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-NO-VIH-DAG: @_ZZ18inline_hidden_funcvE3var = linkonce_odr hidden global i32 0, comdat +// CHECK-NO-VIH-DAG: @_ZZ19inline_default_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-NO-VIH-DAG: @_ZZN13ExportedClass10inl_methodEvE3var = linkonce_odr global i32 0, comdat, align 4 +// CHECK-NO-VIH-DAG: define i32 @_Z4funcv() +// CHECK-NO-VIH-DAG: define hidden i32 @_Z11hidden_funcv() +// CHECK-NO-VIH-DAG: define i32 @_Z12default_funcv() +// CHECK-NO-VIH-DAG: define linkonce_odr i32 @_Z11inline_funcv() +// CHECK-NO-VIH-DAG: define linkonce_odr hidden i32 @_Z18inline_hidden_funcv() +// CHECK-NO-VIH-DAG: define linkonce_odr i32 @_Z19inline_default_funcv() +// CHECK-NO-VIH-DAG: define linkonce_odr i32 @_ZN13ExportedClass10inl_methodEv({{.*}}) +// CHECK-NO-VIH-DAG: define i32 @_ZN13ExportedClass10ext_methodEv({{.*}}) + +// CHECK-VIS-HIDDEN-DAG: @_ZZ4funcvE3var = internal global i32 0 +// CHECK-VIS-HIDDEN-DAG: @_ZZ11hidden_funcvE3var = internal global i32 0 +// CHECK-VIS-HIDDEN-DAG: @_ZZ12default_funcvE3var = internal global i32 0 +// CHECK-VIS-HIDDEN-DAG: @_ZZ11inline_funcvE3var = linkonce_odr hidden global i32 0, comdat +// CHECK-VIS-HIDDEN-DAG: @_ZZ18inline_hidden_funcvE3var = linkonce_odr hidden global i32 0, comdat +// CHECK-VIS-HIDDEN-DAG: @_ZZ19inline_default_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-VIS-HIDDEN-DAG: @_ZZN13ExportedClass10inl_methodEvE3var = linkonce_odr global i32 0, comdat, align 4 +// CHECK-VIS-HIDDEN-DAG: define hidden i32 @_Z4funcv() +// CHECK-VIS-HIDDEN-DAG: define hidden i32 @_Z11hidden_funcv() +// CHECK-VIS-HIDDEN-DAG: define i32 @_Z12default_funcv() +// CHECK-VIS-HIDDEN-DAG: define linkonce_odr hidden i32 @_Z11inline_funcv() +// CHECK-VIS-HIDDEN-DAG: define linkonce_odr hidden i32 @_Z18inline_hidden_funcv() +// CHECK-VIS-HIDDEN-DAG: define linkonce_odr i32 @_Z19inline_default_funcv() +// CHECK-VIS-HIDDEN-DAG: define linkonce_odr hidden i32 @_ZN13ExportedClass10inl_methodEv({{.*}}) +// CHECK-VIS-HIDDEN-DAG: define i32 @_ZN13ExportedClass10ext_methodEv({{.*}}) + +// CHECK-VIS-PROTECTED-DAG: @_ZZ4funcvE3var = internal global i32 0 +// CHECK-VIS-PROTECTED-DAG: @_ZZ11hidden_funcvE3var = internal global i32 0 +// CHECK-VIS-PROTECTED-DAG: @_ZZ12default_funcvE3var = internal global i32 0 +// CHECK-VIS-PROTECTED-DAG: @_ZZ11inline_funcvE3var = linkonce_odr protected global i32 0, comdat +// CHECK-VIS-PROTECTED-DAG: @_ZZ18inline_hidden_funcvE3var = linkonce_odr hidden global i32 0, comdat +// CHECK-VIS-PROTECTED-DAG: @_ZZ19inline_default_funcvE3var = linkonce_odr global i32 0, comdat +// CHECK-VIS-PROTECTED-DAG: @_ZZN13ExportedClass10inl_methodEvE3var = linkonce_odr global i32 0, comdat, align 4 +// CHECK-VIS-PROTECTED-DAG: define protected i32 @_Z4funcv() +// CHECK-VIS-PROTECTED-DAG: define hidden i32 @_Z11hidden_funcv() +// CHECK-VIS-PROTECTED-DAG: define i32 @_Z12default_funcv() +// CHECK-VIS-PROTECTED-DAG: define linkonce_odr hidden i32 @_Z11inline_funcv() +// CHECK-VIS-PROTECTED-DAG: define linkonce_odr hidden i32 @_Z18inline_hidden_funcv() +// CHECK-VIS-PROTECTED-DAG: define linkonce_odr i32 @_Z19inline_default_funcv() +// CHECK-VIS-PROTECTED-DAG: define linkonce_odr hidden i32 @_ZN13ExportedClass10inl_methodEv({{.*}}) +// CHECK-VIS-PROTECTED-DAG: define i32 @_ZN13ExportedClass10ext_methodEv({{.*}}) + +int func(void) { + static int var = 0; + return var++; +} +inline int inline_func(void) { + static int var = 0; + return var++; +} +int __attribute__((visibility("hidden"))) hidden_func(void) { + static int var = 0; + return var++; +} +inline int __attribute__((visibility("hidden"))) inline_hidden_func(void) { + static int var = 0; + return var++; +} +int __attribute__((visibility("default"))) default_func(void) { + static int var = 0; + return var++; +} +inline int __attribute__((visibility("default"))) inline_default_func(void) { + static int var = 0; + return var++; +} +struct __attribute__((visibility("default"))) ExportedClass { + int inl_method() { + static int var = 0; + return var++; + } + int ext_method(); +}; +int ExportedClass::ext_method() { return inl_method(); } +void bar(void) { + func(); + inline_func(); + hidden_func(); + inline_hidden_func(); + default_func(); + inline_default_func(); +} diff --git a/test/Driver/openbsd.c b/test/Driver/openbsd.c index 4aafa2f1f50c..16621b040759 100644 --- a/test/Driver/openbsd.c +++ b/test/Driver/openbsd.c @@ -12,7 +12,7 @@ // RUN: %clang -no-canonical-prefixes -target i686-pc-openbsd -pg -pthread %s -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PG %s // CHECK-PG: clang{{.*}}" "-cc1" "-triple" "i686-pc-openbsd" -// CHECK-PG: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-o" "a.out" "{{.*}}gcrt0.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-lgcc" "-lpthread_p" "-lc_p" "-lgcc" "{{.*}}crtend.o" +// CHECK-PG: ld{{.*}}" "-e" "__start" "--eh-frame-hdr" "-Bdynamic" "-dynamic-linker" "{{.*}}ld.so" "-nopie" "-o" "a.out" "{{.*}}gcrt0.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-lgcc" "-lpthread_p" "-lc_p" "-lgcc" "{{.*}}crtend.o" // Check CPU type for MIPS64 // RUN: %clang -target mips64-unknown-openbsd -### -c %s 2>&1 \ From 22ef103ecf6cbc897202c1b70bf9a0edabfee146 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 8 Dec 2018 14:32:10 +0000 Subject: [PATCH 6/6] Vendor import of libc++ release_70 branch r348686: https://llvm.org/svn/llvm-project/libcxx/branches/release_70@348686 --- cmake/Modules/HandleLibCXXABI.cmake | 2 +- src/filesystem/operations.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/Modules/HandleLibCXXABI.cmake b/cmake/Modules/HandleLibCXXABI.cmake index ef3b4f5dde22..1c19d7e01af7 100644 --- a/cmake/Modules/HandleLibCXXABI.cmake +++ b/cmake/Modules/HandleLibCXXABI.cmake @@ -41,7 +41,7 @@ macro(setup_abi_lib abidefines abilib abifiles abidirs) get_filename_component(ifile ${fpath} NAME) set(src ${incpath}/${fpath}) - set(dst ${LIBCXX_BINARY_INCLUDE_DIR}/${dstdir}/${fpath}) + set(dst ${LIBCXX_BINARY_INCLUDE_DIR}/${dstdir}/${ifile}) add_custom_command(OUTPUT ${dst} DEPENDS ${src} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} diff --git a/src/filesystem/operations.cpp b/src/filesystem/operations.cpp index 65a4b319339b..c9396b59cae8 100644 --- a/src/filesystem/operations.cpp +++ b/src/filesystem/operations.cpp @@ -352,7 +352,6 @@ struct FileDescriptor { ~FileDescriptor() { close(); } - FileDescriptor() = default; FileDescriptor(FileDescriptor const&) = delete; FileDescriptor& operator=(FileDescriptor const&) = delete;