Vendor import of llvm release_70 branch r348011:

https://llvm.org/svn/llvm-project/llvm/branches/release_70@348011
This commit is contained in:
Dimitry Andric 2018-12-01 15:41:24 +00:00
parent 86392292ee
commit d17fea9f41
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist-release_70/; revision=341365
svn path=/vendor/llvm/llvm-release_70-r348011/; revision=341366; tag=vendor/llvm/llvm-release_70-r348011
47 changed files with 2386 additions and 281 deletions

View File

@ -165,6 +165,11 @@ class MCAsmBackend {
return 0;
}
/// Check whether a given symbol has been flagged with MICROMIPS flag.
virtual bool isMicroMips(const MCSymbol *Sym) const {
return false;
}
/// Handles all target related code padding when starting to write a new
/// basic block to an object file.
///

View File

@ -1186,6 +1186,20 @@ struct SemiNCAInfo {
<< '\t' << U << "\n");
LLVM_DEBUG(dbgs() << "\n");
// Recalculate the DominatorTree when the number of updates
// exceeds a threshold, which usually makes direct updating slower than
// recalculation. We select this threshold proportional to the
// size of the DominatorTree. The constant is selected
// by choosing the one with an acceptable performance on some real-world
// inputs.
// Make unittests of the incremental algorithm work
if (DT.DomTreeNodes.size() <= 100) {
if (NumLegalized > DT.DomTreeNodes.size())
CalculateFromScratch(DT, &BUI);
} else if (NumLegalized > DT.DomTreeNodes.size() / 40)
CalculateFromScratch(DT, &BUI);
// If the DominatorTree was recalculated at some point, stop the batch
// updates. Full recalculations ignore batch updates and look at the actual
// CFG.

View File

@ -76,6 +76,10 @@ class SSAUpdater {
/// block.
bool HasValueForBlock(BasicBlock *BB) const;
/// Return the value for the specified block if the SSAUpdater has one,
/// otherwise return nullptr.
Value *FindValueForBlock(BasicBlock *BB) const;
/// Construct SSA form, materializing a value that is live at the end
/// of the specified block.
Value *GetValueAtEndOfBlock(BasicBlock *BB);

View File

@ -357,10 +357,9 @@ class SSAUpdaterImpl {
BBInfo *Info = *I;
if (Info->DefBB != Info) {
// Record the available value at join nodes to speed up subsequent
// uses of this SSAUpdater for the same value.
if (Info->NumPreds > 1)
(*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal;
// Record the available value to speed up subsequent uses of this
// SSAUpdater for the same value.
(*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal;
continue;
}

View File

@ -1156,10 +1156,11 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
// Append "$symbol" to the section name when targetting mingw. The ld.bfd
// Append "$symbol" to the section name *before* IR-level mangling is
// applied when targetting mingw. This is what GCC does, and the ld.bfd
// COFF linker will not properly handle comdats otherwise.
if (getTargetTriple().isWindowsGNUEnvironment())
raw_svector_ostream(Name) << '$' << COMDATSymName;
raw_svector_ostream(Name) << '$' << ComdatGV->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection, UniqueID);

View File

@ -524,6 +524,11 @@ static void AttemptToFoldSymbolOffsetDifference(
if (Asm->isThumbFunc(&SA))
Addend |= 1;
// If symbol is labeled as micromips, we set low-bit to ensure
// correct offset in .gcc_except_table
if (Asm->getBackend().isMicroMips(&SA))
Addend |= 1;
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = nullptr;

View File

@ -25,6 +25,7 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
@ -568,6 +569,14 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
}
}
bool MipsAsmBackend::isMicroMips(const MCSymbol *Sym) const {
if (const auto *ElfSym = dyn_cast<const MCSymbolELF>(Sym)) {
if (ElfSym->getOther() & ELF::STO_MIPS_MICROMIPS)
return true;
}
return false;
}
MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,

View File

@ -25,6 +25,7 @@ class MCAssembler;
struct MCFixupKindInfo;
class MCObjectWriter;
class MCRegisterInfo;
class MCSymbolELF;
class Target;
class MipsAsmBackend : public MCAsmBackend {
@ -90,6 +91,7 @@ class MipsAsmBackend : public MCAsmBackend {
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
bool isMicroMips(const MCSymbol *Sym) const override;
}; // class MipsAsmBackend
} // namespace

View File

@ -15,6 +15,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbolELF.h"
@ -53,6 +54,22 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
createPendingLabelRelocs();
}
void MipsELFStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
Frame.Begin = getContext().createTempSymbol();
MCELFStreamer::EmitLabel(Frame.Begin);
}
MCSymbol *MipsELFStreamer::EmitCFILabel() {
MCSymbol *Label = getContext().createTempSymbol("cfi", true);
MCELFStreamer::EmitLabel(Label);
return Label;
}
void MipsELFStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
Frame.End = getContext().createTempSymbol();
MCELFStreamer::EmitLabel(Frame.End);
}
void MipsELFStreamer::createPendingLabelRelocs() {
MipsTargetELFStreamer *ELFTargetStreamer =
static_cast<MipsTargetELFStreamer *>(getTargetStreamer());

View File

@ -26,6 +26,7 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCSubtargetInfo;
struct MCDwarfFrameInfo;
class MipsELFStreamer : public MCELFStreamer {
SmallVector<std::unique_ptr<MipsOptionRecord>, 8> MipsOptionRecords;
@ -60,6 +61,12 @@ class MipsELFStreamer : public MCELFStreamer {
void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
void EmitIntValue(uint64_t Value, unsigned Size) override;
// Overriding these functions allows us to avoid recording of these labels
// in EmitLabel and later marking them as microMIPS.
void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
MCSymbol *EmitCFILabel() override;
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();

View File

@ -1733,7 +1733,7 @@ defm S_MMR6 : Cmp_Pats<f32, NOR_MMR6, ZERO>, ISA_MICROMIPS32R6;
defm D_MMR6 : Cmp_Pats<f64, NOR_MMR6, ZERO>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1 ZERO))>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
(TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;

View File

@ -838,7 +838,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
(SUBu GPR32:$src, GPR32:$src2), sub_32)>;
def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(MUL GPR32:$src, GPR32:$src2), sub_32)>;
(MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(PseudoMFHI ACC64:$src), sub_32)>;
@ -1139,3 +1139,6 @@ def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
"sltu\t$rs, $rt, $imm">, GPR_64;
def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
imm64:$imm)>, GPR_64;
def : MipsInstAlias<"rdhwr $rt, $rs",
(RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64;

View File

@ -301,6 +301,9 @@ def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
// Patterns used for matching away redundant sign extensions.
// MIPS32 arithmetic instructions sign extend their result implicitly.
def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(MUL_R6 GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6;
def : MipsPat<(i64 (sext (i32 (sdiv GPR32:$src, GPR32:$src2)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(DIV GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS64R6;

View File

@ -953,6 +953,11 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// For now, just try the simplest case where it's fed by a compare.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
MVT CIMVT =
TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT();
if (CIMVT == MVT::i1)
return false;
unsigned CondReg = getRegForValue(CI);
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
.addReg(CondReg)

View File

@ -485,14 +485,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM<6>,
ISA_MIPS1;
def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
bitconvert>, MFC1_FM<0>, ISA_MIPS1;
def MFC1 : MMRel, StdMMR6Rel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
bitconvert>, MFC1_FM<0>, ISA_MIPS1;
def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>,
ISA_MIPS1, FGR_64 {
let DecoderNamespace = "MipsFP64";
}
def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>, ISA_MIPS1;
def MTC1 : MMRel, StdMMR6Rel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>, ISA_MIPS1;
def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>,
ISA_MIPS1, FGR_64 {
let DecoderNamespace = "MipsFP64";

View File

@ -299,8 +299,12 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
// register). Unfortunately, we have to make this decision before register
// allocation so for now we use a spill/reload sequence for all
// double-precision values in regardless of being an odd/even register.
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
//
// For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as
// implicit operand, so other passes (like ShrinkWrapping) are aware that
// stack is used.
if (I->getNumOperands() == 4 && I->getOperand(3).isReg()
&& I->getOperand(3).getReg() == Mips::SP) {
unsigned DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg();
unsigned HiReg = I->getOperand(2).getReg();
@ -360,9 +364,12 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
// register). Unfortunately, we have to make this decision before register
// allocation so for now we use a spill/reload sequence for all
// double-precision values in regardless of being an odd/even register.
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
//
// For the cases that should be covered here MipsSEISelDAGToDAG adds $sp as
// implicit operand, so other passes (like ShrinkWrapping) are aware that
// stack is used.
if (I->getNumOperands() == 4 && I->getOperand(3).isReg()
&& I->getOperand(3).getReg() == Mips::SP) {
unsigned DstReg = I->getOperand(0).getReg();
unsigned SrcReg = Op1.getReg();
unsigned N = Op2.getImm();

View File

@ -238,6 +238,18 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
case Mips::WRDSP:
addDSPCtrlRegOperands(true, MI, MF);
break;
case Mips::BuildPairF64_64:
case Mips::ExtractElementF64_64:
if (!Subtarget->useOddSPReg()) {
MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
break;
}
// fallthrough
case Mips::BuildPairF64:
case Mips::ExtractElementF64:
if (Subtarget->isABI_FPXX() && !Subtarget->hasMTHC1())
MI.addOperand(MachineOperand::CreateReg(Mips::SP, false, true));
break;
default:
replaceUsesWithZeroReg(MRI, MI);
}

View File

@ -25,9 +25,14 @@
using namespace llvm;
static unsigned getUnconditionalBranch(const MipsSubtarget &STI) {
if (STI.inMicroMipsMode())
return STI.isPositionIndependent() ? Mips::B_MM : Mips::J_MM;
return STI.isPositionIndependent() ? Mips::B : Mips::J;
}
MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
: MipsInstrInfo(STI, STI.isPositionIndependent() ? Mips::B : Mips::J),
RI() {}
: MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {}
const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
return RI;
@ -643,7 +648,7 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 ||
Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T ||
Opc == Mips::BC1F || Opc == Mips::B || Opc == Mips::J ||
Opc == Mips::B_MM || Opc == Mips::BEQZC_MM ||
Opc == Mips::J_MM || Opc == Mips::B_MM || Opc == Mips::BEQZC_MM ||
Opc == Mips::BNEZC_MM || Opc == Mips::BEQC || Opc == Mips::BNEC ||
Opc == Mips::BLTC || Opc == Mips::BGEC || Opc == Mips::BLTUC ||
Opc == Mips::BGEUC || Opc == Mips::BGTZC || Opc == Mips::BLEZC ||

View File

@ -592,6 +592,7 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
XXPERM,
XXPERMR,
XXSLDWI,
XXSLDWIs,
XXSPLTIB,
XXSPLTW,
XXSPLTWs,

View File

@ -8454,17 +8454,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
// If the source for the shuffle is a scalar_to_vector that came from a
// 32-bit load, it will have used LXVWSX so we don't need to splat again.
if (Subtarget.hasP9Vector() &&
((isLittleEndian && SplatIdx == 3) ||
(!isLittleEndian && SplatIdx == 0))) {
SDValue Src = V1.getOperand(0);
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
Src.getOperand(0).getOpcode() == ISD::LOAD &&
Src.getOperand(0).hasOneUse())
return V1;
}
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
DAG.getConstant(SplatIdx, dl, MVT::i32));

View File

@ -877,6 +877,12 @@ let Uses = [RM] in {
"xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
[(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
imm32SExt16:$SHW))]>;
let isCodeGenOnly = 1 in
def XXSLDWIs : XX3Form_2s<60, 2,
(outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW),
"xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>;
def XXSPLTW : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm,
@ -886,6 +892,7 @@ let Uses = [RM] in {
def XXSPLTWs : XX2Form_2<60, 164,
(outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // hasSideEffects
} // UseVSXReg = 1
@ -1466,8 +1473,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
(f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
}
def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
(v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;
// Instructions for converting float to i64 feeding a store.
let Predicates = [NoP9Vector] in {
@ -3050,13 +3055,47 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extloadf32 xoaddr:$src)))))),
(v4f32 (LXVWSX xoaddr:$src))>;
let AddedComplexity = 400 in {
// LIWAX - This instruction is used for sign extending i32 -> i64.
// LIWZX - This instruction will be emitted for i32, f32, and when
// zero-extending i32 to i64 (zext i32 -> i64).
let Predicates = [IsLittleEndian] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (XXPERMDIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
}
let Predicates = [IsBigEndian] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
(v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
(v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (XXSLDWIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
(v4f32 (XXSLDWIs
(COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
}
}
// Build vectors from i8 loads
def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
@ -3218,6 +3257,39 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
(f32 (DFLOADf32 ixaddr:$src))>;
let AddedComplexity = 400 in {
// The following pseudoinstructions are used to ensure the utilization
// of all 64 VSX registers.
let Predicates = [IsLittleEndian, HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
(v2i64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
(v2f64 (XXPERMDIs
(COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
}
let Predicates = [IsBigEndian, HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
(v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
(v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
(v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
(v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
}
}
let Predicates = [IsBigEndian, HasP9Vector] in {
// (Un)Signed DWord vector extract -> QP
@ -3932,3 +4004,4 @@ let AddedComplexity = 400 in {
(v4i32 (VEXTSH2W $A))>;
}
}

View File

@ -41,6 +41,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
@ -201,6 +202,21 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
SSAUpdate.RewriteUse(*UseToRewrite);
}
SmallVector<DbgValueInst *, 4> DbgValues;
llvm::findDbgValues(DbgValues, I);
// Update pre-existing debug value uses that reside outside the loop.
auto &Ctx = I->getContext();
for (auto DVI : DbgValues) {
BasicBlock *UserBB = DVI->getParent();
if (InstBB == UserBB || L->contains(UserBB))
continue;
// We currently only handle debug values residing in blocks where we have
// inserted a PHI instruction.
if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
}
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
// to post-process them to keep LCSSA form.
for (PHINode *InsertedPN : InsertedPHIs) {

View File

@ -64,6 +64,11 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
return getAvailableVals(AV).count(BB);
}
Value *SSAUpdater::FindValueForBlock(BasicBlock *BB) const {
AvailableValsTy::iterator AVI = getAvailableVals(AV).find(BB);
return (AVI != getAvailableVals(AV).end()) ? AVI->second : nullptr;
}
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
assert(ProtoType && "Need to initialize SSAUpdater");
assert(ProtoType == V->getType() &&

View File

@ -0,0 +1,189 @@
; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel=true -mcpu=mips32r2 \
; RUN: < %s -verify-machineinstrs | FileCheck %s
define void @testeq(i32, i32) {
; CHECK-LABEL: testeq:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: beq $[[REG0]], $[[REG1]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp eq i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testne(i32, i32) {
; CHECK-LABEL: testne:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: bne $[[REG0]], $[[REG1]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp ne i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testugt(i32, i32) {
; CHECK-LABEL: testugt:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]]
; CHECK: bnez $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp ugt i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testuge(i32, i32) {
; CHECK-LABEL: testuge:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]]
; CHECK: beqz $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp uge i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testult(i32, i32) {
; CHECK-LABEL: testult:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: sltu $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]]
; CHECK: bnez $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp ult i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testule(i32, i32) {
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: sltu $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]]
; CHECK: beqz $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp ule i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testsgt(i32, i32) {
; CHECK-LABEL: testsgt:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: negu $[[REG0]], $[[REG0]]
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: negu $[[REG1]], $[[REG1]]
; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]]
; CHECK: bnez $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp sgt i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testsge(i32, i32) {
; CHECK-LABEL: testsge:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: negu $[[REG0]], $[[REG0]]
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: negu $[[REG1]], $[[REG1]]
; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]]
; CHECK: beqz $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp sge i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testslt(i32, i32) {
; CHECK-LABEL: testslt:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: negu $[[REG0]], $[[REG0]]
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: negu $[[REG1]], $[[REG1]]
; CHECK: slt $[[REG2:[0-9]+]], $[[REG0]], $[[REG1]]
; CHECK: bnez $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp slt i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
define void @testsle(i32, i32) {
; CHECK-LABEL: testsle:
; CHECK: andi $[[REG0:[0-9]+]], $4, 1
; CHECK: negu $[[REG0]], $[[REG0]]
; CHECK: andi $[[REG1:[0-9]+]], $5, 1
; CHECK: negu $[[REG1]], $[[REG1]]
; CHECK: slt $[[REG2:[0-9]+]], $[[REG1]], $[[REG0]]
; CHECK: beqz $[[REG2]],
%3 = trunc i32 %0 to i1
%4 = trunc i32 %1 to i1
%5 = icmp sle i1 %3, %4
br i1 %5, label %end, label %trap
trap:
call void @llvm.trap()
br label %end
end:
ret void
}
declare void @llvm.trap()

View File

@ -0,0 +1,32 @@
; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \
; RUN: -mcpu=mips32 -mattr=+fpxx \
; RUN: -stop-after=expand-isel-pseudos | \
; RUN: FileCheck %s -check-prefix=FPXX-IMPLICIT-SP
; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \
; RUN: -mcpu=mips32r6 -mattr=+fp64,+nooddspreg \
; RUN: -stop-after=expand-isel-pseudos | \
; RUN: FileCheck %s -check-prefix=FP64-IMPLICIT-SP
; RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu \
; RUN: -mcpu=mips32r2 -mattr=+fpxx \
; RUN: -stop-after=expand-isel-pseudos | \
; RUN: FileCheck %s -check-prefix=NO-IMPLICIT-SP
define double @foo2(i32 signext %v1, double %d1) {
entry:
; FPXX-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp
; FPXX-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp
; FP64-IMPLICIT-SP: BuildPairF64_64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp
; FP64-IMPLICIT-SP: ExtractElementF64_64 killed %{{[0-9]+}}, 1, implicit $sp
; NO-IMPLICIT-SP: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}
; NO-IMPLICIT-SP-NOT: BuildPairF64 %{{[0-9]+}}, %{{[0-9]+}}, implicit $sp
; NO-IMPLICIT-SP: ExtractElementF64 killed %{{[0-9]+}}, 1
; NO-IMPLICIT-SP-NOT: ExtractElementF64 killed %{{[0-9]+}}, 1, implicit $sp
%conv = fptrunc double %d1 to float
%0 = tail call float @llvm.copysign.f32(float 1.000000e+00, float %conv)
%conv1 = fpext float %0 to double
ret double %conv1
}
declare float @llvm.copysign.f32(float, float)

View File

@ -231,16 +231,13 @@ define void @test1(i32 signext %s) {
; MICROMIPSSTATIC: # %bb.0: # %entry
; MICROMIPSSTATIC-NEXT: bnezc $4, $BB0_2
; MICROMIPSSTATIC-NEXT: # %bb.1: # %entry
; MICROMIPSSTATIC-NEXT: j $BB0_4
; MICROMIPSSTATIC-NEXT: nop
; MICROMIPSSTATIC-NEXT: $BB0_2: # %entry
; MICROMIPSSTATIC-NEXT: j $BB0_3
; MICROMIPSSTATIC-NEXT: nop
; MICROMIPSSTATIC-NEXT: $BB0_3: # %then
; MICROMIPSSTATIC-NEXT: $BB0_2: # %then
; MICROMIPSSTATIC-NEXT: lui $1, %hi(x)
; MICROMIPSSTATIC-NEXT: li16 $2, 1
; MICROMIPSSTATIC-NEXT: sw $2, %lo(x)($1)
; MICROMIPSSTATIC-NEXT: $BB0_4: # %end
; MICROMIPSSTATIC-NEXT: $BB0_3: # %end
; MICROMIPSSTATIC-NEXT: jrc $ra
;
; MICROMIPSR6STATIC-LABEL: test1:

View File

@ -0,0 +1,98 @@
; RUN: llc -march=mips -relocation-model=pic -mattr=+micromips \
; RUN: -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
; CHECK-LABEL: foo:
; CHECK-NEXT: 0: 41 a2 00 00 lui $2, 0
; CHECK-NEXT: 4: 30 42 00 00 addiu $2, $2, 0
; CHECK-NEXT: 8: 03 22 11 50 addu $2, $2, $25
; CHECK-NEXT: c: fc 42 00 00 lw $2, 0($2)
; CHECK-NEXT: 10: 69 20 lw16 $2, 0($2)
; CHECK-NEXT: 12: 40 c2 00 14 bgtz $2, 44 <foo+0x3e>
; CHECK-NEXT: 16: 00 00 00 00 nop
; CHECK-NEXT: 1a: 33 bd ff f8 addiu $sp, $sp, -8
; CHECK-NEXT: 1e: fb fd 00 00 sw $ra, 0($sp)
; CHECK-NEXT: 22: 41 a1 00 01 lui $1, 1
; CHECK-NEXT: 26: 40 60 00 02 bal 8 <foo+0x2e>
; CHECK-NEXT: 2a: 30 21 04 68 addiu $1, $1, 1128
; CHECK-NEXT: 2e: 00 3f 09 50 addu $1, $ra, $1
; CHECK-NEXT: 32: ff fd 00 00 lw $ra, 0($sp)
; CHECK-NEXT: 36: 00 01 0f 3c jr $1
; CHECK-NEXT: 3a: 33 bd 00 08 addiu $sp, $sp, 8
; CHECK-NEXT: 3e: 94 00 00 02 b 8 <foo+0x46>
; CHECK-NEXT: 42: 00 00 00 00 nop
; CHECK-NEXT: 46: 30 20 4e 1f addiu $1, $zero, 19999
; CHECK-NEXT: 4a: b4 22 00 14 bne $2, $1, 44 <foo+0x76>
; CHECK-NEXT: 4e: 00 00 00 00 nop
; CHECK-NEXT: 52: 33 bd ff f8 addiu $sp, $sp, -8
; CHECK-NEXT: 56: fb fd 00 00 sw $ra, 0($sp)
; CHECK-NEXT: 5a: 41 a1 00 01 lui $1, 1
; CHECK-NEXT: 5e: 40 60 00 02 bal 8 <foo+0x66>
; CHECK-NEXT: 62: 30 21 04 5c addiu $1, $1, 1116
; CHECK-NEXT: 66: 00 3f 09 50 addu $1, $ra, $1
; CHECK-NEXT: 6a: ff fd 00 00 lw $ra, 0($sp)
; CHECK-NEXT: 6e: 00 01 0f 3c jr $1
; CHECK-NEXT: 72: 33 bd 00 08 addiu $sp, $sp, 8
; CHECK-NEXT: 76: 30 20 27 0f addiu $1, $zero, 9999
; CHECK-NEXT: 7a: 94 22 00 14 beq $2, $1, 44 <foo+0xa6>
; CHECK-NEXT: 7e: 00 00 00 00 nop
; CHECK-NEXT: 82: 33 bd ff f8 addiu $sp, $sp, -8
; CHECK-NEXT: 86: fb fd 00 00 sw $ra, 0($sp)
; CHECK-NEXT: 8a: 41 a1 00 01 lui $1, 1
; CHECK-NEXT: 8e: 40 60 00 02 bal 8 <foo+0x96>
; CHECK-NEXT: 92: 30 21 04 2c addiu $1, $1, 1068
; CHECK-NEXT: 96: 00 3f 09 50 addu $1, $ra, $1
; CHECK-NEXT: 9a: ff fd 00 00 lw $ra, 0($sp)
; CHECK-NEXT: 9e: 00 01 0f 3c jr $1
; CHECK-NEXT: a2: 33 bd 00 08 addiu $sp, $sp, 8
; CHECK: 10466: 00 00 00 00 nop
; CHECK-NEXT: 1046a: 94 00 00 02 b 8 <foo+0x10472>
; CHECK-NEXT: 1046e: 00 00 00 00 nop
; CHECK-NEXT: 10472: 33 bd ff f8 addiu $sp, $sp, -8
; CHECK-NEXT: 10476: fb fd 00 00 sw $ra, 0($sp)
; CHECK-NEXT: 1047a: 41 a1 00 01 lui $1, 1
; CHECK-NEXT: 1047e: 40 60 00 02 bal 8 <foo+0x10486>
; CHECK-NEXT: 10482: 30 21 04 00 addiu $1, $1, 1024
; CHECK-NEXT: 10486: 00 3f 09 50 addu $1, $ra, $1
; CHECK-NEXT: 1048a: ff fd 00 00 lw $ra, 0($sp)
; CHECK-NEXT: 1048e: 00 01 0f 3c jr $1
; CHECK-NEXT: 10492: 33 bd 00 08 addiu $sp, $sp, 8
; CHECK-NEXT: 10496: 94 00 00 02 b 8 <foo+0x1049e>
@x = external global i32, align 4
define void @foo() {
%1 = load i32, i32* @x, align 4
%2 = icmp sgt i32 %1, 0
br i1 %2, label %la, label %lf
la:
switch i32 %1, label %le [
i32 9999, label %lb
i32 19999, label %lc
]
lb:
tail call void asm sideeffect ".space 0", ""()
br label %le
lc:
tail call void asm sideeffect ".space 0", ""()
br label %le
le:
tail call void asm sideeffect ".space 66500", ""()
br label %lg
lf:
tail call void asm sideeffect ".space 0", ""()
br label %lg
lg:
tail call void asm sideeffect ".space 0", ""()
br label %li
li:
tail call void asm sideeffect ".space 0", ""()
ret void
}

View File

@ -0,0 +1,37 @@
; RUN: llc -mtriple=mips-linux-gnu -mcpu=mips32r2 -mattr=+micromips -O3 -filetype=obj < %s | llvm-objdump -s -j .gcc_except_table - | FileCheck %s
; CHECK: Contents of section .gcc_except_table:
; CHECK-NEXT: 0000 ff9b1501 0c011100 00110e1f 011f1800
; CHECK-NEXT: 0010 00010000 00000000
@_ZTIi = external constant i8*
define dso_local i32 @main() local_unnamed_addr norecurse personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
%exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
%0 = bitcast i8* %exception.i to i32*
store i32 5, i32* %0, align 16
invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
to label %.noexc unwind label %return
.noexc:
unreachable
return:
%1 = landingpad { i8*, i32 }
catch i8* null
%2 = extractvalue { i8*, i32 } %1, 0
%3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind
tail call void @__cxa_end_catch()
ret i32 0
}
declare i32 @__gxx_personality_v0(...)
declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr
declare void @__cxa_end_catch() local_unnamed_addr
declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr
declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr

View File

@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=mips -mcpu=mips32r2 -mattr=+micromips \
; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM2 %s
; RUN: llc -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
; RUN: -show-mc-encoding < %s | FileCheck --check-prefix=MM6 %s
define double @foo(double %a, double %b) {
; MM2-LABEL: foo:
; MM2: # %bb.0: # %entry
; MM2-NEXT: mov.d $f0, $f12 # encoding: [0x54,0x0c,0x20,0x7b]
; MM2-NEXT: mtc1 $zero, $f2 # encoding: [0x54,0x02,0x28,0x3b]
; MM2-NEXT: mthc1 $zero, $f2 # encoding: [0x54,0x02,0x38,0x3b]
; MM2-NEXT: c.ule.d $f12, $f2 # encoding: [0x54,0x4c,0x05,0xfc]
; MM2-NEXT: bc1t $BB0_2 # encoding: [0x43,0xa0,A,A]
; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_PC16_S1
; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00]
; MM2-NEXT: # %bb.1: # %entry
; MM2-NEXT: j $BB0_2 # encoding: [0b110101AA,A,A,A]
; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_26_S1
; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00]
; MM2-NEXT: $BB0_2: # %return
; MM2-NEXT: jrc $ra # encoding: [0x45,0xbf]
;
; MM6-LABEL: foo:
; MM6: # %bb.0: # %entry
; MM6-NEXT: mov.d $f0, $f12 # encoding: [0x46,0x20,0x60,0x06]
; MM6-NEXT: mtc1 $zero, $f1 # encoding: [0x54,0x01,0x28,0x3b]
; MM6-NEXT: mthc1 $zero, $f1 # encoding: [0x54,0x01,0x38,0x3b]
; MM6-NEXT: cmp.ule.d $f1, $f12, $f1 # encoding: [0x54,0x2c,0x09,0xd5]
; MM6-NEXT: mfc1 $2, $f1 # encoding: [0x54,0x41,0x20,0x3b]
; MM6-NEXT: andi16 $2, $2, 1 # encoding: [0x2d,0x21]
; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf]
entry:
%cmp = fcmp ogt double %a, 0.000000e+00
br i1 %cmp, label %if.end, label %if.else
if.else:
br label %return
if.end:
%mul = fmul double %a, 2.000000e+00
br label %return
return:
ret double %a
}
define double @bar(double %x, double %y) {
; MM2-LABEL: bar:
; MM2: # %bb.0: # %entry
; MM2-NEXT: mov.d $f0, $f14 # encoding: [0x54,0x0e,0x20,0x7b]
; MM2-NEXT: c.olt.d $f12, $f14 # encoding: [0x55,0xcc,0x05,0x3c]
; MM2-NEXT: jr $ra # encoding: [0x00,0x1f,0x0f,0x3c]
; MM2-NEXT: movt.d $f0, $f12, $fcc0 # encoding: [0x54,0x0c,0x02,0x60]
;
; MM6-LABEL: bar:
; MM6: # %bb.0: # %entry
; MM6-NEXT: cmp.lt.d $f0, $f12, $f14 # encoding: [0x55,0xcc,0x01,0x15]
; MM6-NEXT: mfc1 $1, $f0 # encoding: [0x54,0x20,0x20,0x3b]
; MM6-NEXT: mtc1 $1, $f0 # encoding: [0x44,0x81,0x00,0x00]
; MM6-NEXT: sel.d $f0, $f14, $f12 # encoding: [0x55,0x8e,0x02,0xb8]
; MM6-NEXT: jrc $ra # encoding: [0x45,0xbf]
; FIXME: mtc1 is encoded as a regular non-microMIPS instruction
entry:
%z = fcmp olt double %x, %y
%r = select i1 %z, double %x, double %y
ret double %r
}

View File

@ -0,0 +1,150 @@
# RUN: llc -o - %s -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \
# RUN: -start-before=shrink-wrap -stop-after=prologepilog | FileCheck %s
--- |
declare void @foo()
define void @testBuildPairF64() {
ret void
}
define void @testBuildPairF64_64() {
ret void
}
define void @testBuildPairF64implicitSp() {
ret void
}
define void @testBuildPairF64_64implicitSp() {
ret void
}
define void @testExtractElementF64() {
ret void
}
define void @testExtractElementF64_64() {
ret void
}
define void @testExtractElementF64implicitSp() {
ret void
}
define void @testExtractElementF64_64implicitSp() {
ret void
}
...
---
name: testBuildPairF64
# CHECK-LABEL: name: testBuildPairF64
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: BuildPairF64
body: |
bb.0:
$d0 = BuildPairF64 $zero, $zero
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testBuildPairF64_64
# CHECK-LABEL: name: testBuildPairF64_64
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: BuildPairF64_64
body: |
bb.0:
$d0_64 = BuildPairF64_64 $zero, $zero
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testBuildPairF64implicitSp
# CHECK-LABEL: name: testBuildPairF64implicitSp
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}}
body: |
bb.0:
$d0 = BuildPairF64 $zero, $zero, implicit $sp
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testBuildPairF64_64implicitSp
# CHECK-LABEL: name: testBuildPairF64_64implicitSp
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}}
body: |
bb.0:
$d0_64 = BuildPairF64_64 $zero, $zero, implicit $sp
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testExtractElementF64
# CHECK-LABEL: name: testExtractElementF64
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: ExtractElementF64
body: |
bb.0:
$at = ExtractElementF64 $d6, 1
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testExtractElementF64_64
# CHECK-LABEL: name: testExtractElementF64_64
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: ExtractElementF64_64
body: |
bb.0:
$at = ExtractElementF64_64 $d12_64, 1
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testExtractElementF64implicitSp
# CHECK-LABEL: name: testExtractElementF64implicitSp
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}}
body: |
bb.0:
$at = ExtractElementF64 $d6, 1, implicit $sp
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...
---
name: testExtractElementF64_64implicitSp
# CHECK-LABEL: name: testExtractElementF64_64implicitSp
# CHECK: bb.0
# CHECK-NEXT: successors
# CHECK-NEXT: {{[[:space:]]$}}
# CHECK-NEXT: $sp = ADDiu $sp, -{{[0-9]+}}
body: |
bb.0:
$at = ExtractElementF64_64 $d12_64, 1, implicit $sp
bb.1:
JAL @foo, implicit-def $ra
bb.2:
RetRA
...

View File

@ -48,14 +48,14 @@ entry:
; STATIC32-LABEL: f1:
; STATIC32: lui $[[R0:[0-9]+]], %tprel_hi(t1)
; STATIC32: addiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
; STATIC32: rdhwr $3, $29
; STATIC32: rdhwr $3, $29{{$}}
; STATIC32: addu $[[R2:[0-9]+]], $3, $[[R1]]
; STATIC32: lw $2, 0($[[R2]])
; STATIC64-LABEL: f1:
; STATIC64: lui $[[R0:[0-9]+]], %tprel_hi(t1)
; STATIC64: daddiu $[[R1:[0-9]+]], $[[R0]], %tprel_lo(t1)
; STATIC64: rdhwr $3, $29, 0
; STATIC64: rdhwr $3, $29{{$}}
; STATIC64: daddu $[[R2:[0-9]+]], $3, $[[R0]]
; STATIC64: lw $2, 0($[[R2]])
}
@ -101,7 +101,7 @@ entry:
; STATIC32-LABEL: f2:
; STATIC32: lui $[[R0:[0-9]+]], %hi(__gnu_local_gp)
; STATIC32: addiu $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
; STATIC32: rdhwr $3, $29
; STATIC32: rdhwr $3, $29{{$}}
; STATIC32: lw $[[R0:[0-9]+]], %gottprel(t2)($[[GP]])
; STATIC32: addu $[[R1:[0-9]+]], $3, $[[R0]]
; STATIC32: lw $2, 0($[[R1]])
@ -109,7 +109,7 @@ entry:
; STATIC64-LABEL: f2:
; STATIC64: lui $[[R0:[0-9]+]], %hi(%neg(%gp_rel(f2)))
; STATIC64: daddiu $[[GP:[0-9]+]], $[[R0]], %lo(%neg(%gp_rel(f2)))
; STATIC64: rdhwr $3, $29
; STATIC64: rdhwr $3, $29{{$}}
; STATIC64: ld $[[R0:[0-9]+]], %gottprel(t2)($[[GP]])
; STATIC64: daddu $[[R1:[0-9]+]], $3, $[[R0]]
; STATIC64: lw $2, 0($[[R1]])

View File

@ -1,35 +1,46 @@
; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefix=CHECK-P8
; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefix=CHECK-P9
@a = external local_unnamed_addr global <4 x i32>, align 16
@pb = external local_unnamed_addr global float*, align 8
define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
; CHECK-P8-LABEL: testExpandPostRAPseudo:
; CHECK-P8: lxsiwax 34, 0, 3
; CHECK-P8-NEXT: xxspltw 34, 34, 1
; CHECK-P8-NEXT: stvx 2, 0, 4
; CHECK-P8: #APP
; CHECK-P8-NEXT: #Clobber Rigisters
; CHECK-P8-NEXT: #NO_APP
; CHECK-P8-NEXT: lis 4, 1024
; CHECK-P8-NEXT: lfiwax 0, 0, 3
; CHECK-P8: stfsx 0, 3, 4
; CHECK-P8-NEXT: blr
; CHECK-P9-LABEL: testExpandPostRAPseudo:
; CHECK-P9: lxvwsx 0, 0, 3
; CHECK-P9: stxvx 0, 0, 4
; CHECK-P9: #APP
; CHECK-P9-NEXT: #Clobber Rigisters
; CHECK-P9-NEXT: #NO_APP
; CHECK-P9-NEXT: lis 4, 1024
; CHECK-P9-NEXT: lfiwax 0, 0, 3
; CHECK-P9: stfsx 0, 3, 4
; CHECK-P9-NEXT: blr
; CHECK-P8-LABEL: testExpandPostRAPseudo:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8: lfiwzx f0, 0, r3
; CHECK-P8: ld r4, .LC0@toc@l(r4)
; CHECK-P8: xxpermdi vs0, f0, f0, 2
; CHECK-P8: xxspltw v2, vs0, 3
; CHECK-P8: stvx v2, 0, r4
; CHECK-P8: lis r4, 1024
; CHECK-P8: lfiwax f0, 0, r3
; CHECK-P8: addis r3, r2, .LC1@toc@ha
; CHECK-P8: ld r3, .LC1@toc@l(r3)
; CHECK-P8: xscvsxdsp f0, f0
; CHECK-P8: ld r3, 0(r3)
; CHECK-P8: stfsx f0, r3, r4
; CHECK-P8: blr
;
; CHECK-P9-LABEL: testExpandPostRAPseudo:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9: lfiwzx f0, 0, r3
; CHECK-P9: addis r4, r2, .LC0@toc@ha
; CHECK-P9: ld r4, .LC0@toc@l(r4)
; CHECK-P9: xxpermdi vs0, f0, f0, 2
; CHECK-P9: xxspltw vs0, vs0, 3
; CHECK-P9: stxvx vs0, 0, r4
; CHECK-P9: lis r4, 1024
; CHECK-P9: lfiwax f0, 0, r3
; CHECK-P9: addis r3, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC1@toc@l(r3)
; CHECK-P9: xscvsxdsp f0, f0
; CHECK-P9: ld r3, 0(r3)
; CHECK-P9: stfsx f0, r3, r4
; CHECK-P9: blr
entry:
%0 = load i32, i32* %ptr, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0

View File

@ -109,8 +109,8 @@
;vector int spltRegVali(int val) { //
; return (vector int) val; //
;} //
;// P8: lxsiwax, xxspltw //
;// P9: lxvwsx //
;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;vector int spltMemVali(int *ptr) { //
; return (vector int)*ptr; //
;} //
@ -284,8 +284,8 @@
;vector unsigned int spltRegValui(unsigned int val) { //
; return (vector unsigned int) val; //
;} //
;// P8: lxsiwax, xxspltw //
;// P9: lxvwsx //
;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
;vector unsigned int spltMemValui(unsigned int *ptr) { //
; return (vector unsigned int)*ptr; //
;} //
@ -1202,15 +1202,21 @@ entry:
; P9LE-LABEL: spltMemVali
; P8BE-LABEL: spltMemVali
; P8LE-LABEL: spltMemVali
; P9BE: lxvwsx v2, 0, r3
; P9BE: lfiwzx f0, 0, r3
; P9BE: xxsldwi vs0, f0, f0, 1
; P9BE: xxspltw v2, vs0, 0
; P9BE: blr
; P9LE: lxvwsx v2, 0, r3
; P9LE: lfiwzx f0, 0, r3
; P9LE: xxpermdi vs0, f0, f0, 2
; P9LE: xxspltw v2, vs0, 3
; P9LE: blr
; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
; P8BE: lfiwzx f0, 0, r3
; P8BE: xxsldwi vs0, f0, f0, 1
; P8BE: xxspltw v2, vs0, 0
; P8BE: blr
; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
; P8LE: lfiwzx f0, 0, r3
; P8LE: xxpermdi vs0, f0, f0, 2
; P8LE: xxspltw v2, vs0, 3
; P8LE: blr
}
@ -2338,15 +2344,21 @@ entry:
; P9LE-LABEL: spltMemValui
; P8BE-LABEL: spltMemValui
; P8LE-LABEL: spltMemValui
; P9BE: lxvwsx v2, 0, r3
; P9BE: lfiwzx f0, 0, r3
; P9BE: xxsldwi vs0, f0, f0, 1
; P9BE: xxspltw v2, vs0, 0
; P9BE: blr
; P9LE: lxvwsx v2, 0, r3
; P9LE: lfiwzx f0, 0, r3
; P9LE: xxpermdi vs0, f0, f0, 2
; P9LE: xxspltw v2, vs0, 3
; P9LE: blr
; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
; P8BE: lfiwzx f0, 0, r3
; P8BE: xxsldwi vs0, f0, f0, 1
; P8BE: xxspltw v2, vs0, 0
; P8BE: blr
; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
; P8LE: lfiwzx f0, 0, r3
; P8LE: xxpermdi vs0, f0, f0, 2
; P8LE: xxspltw v2, vs0, 3
; P8LE: blr
}

View File

@ -1,15 +1,27 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \
; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \
; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
define <16 x i8> @test(i32* %s, i32* %t) {
; CHECK-LE-LABEL: test:
; CHECK-LE: # %bb.0: # %entry
; CHECK-LE-NEXT: lfiwzx f0, 0, r3
; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-LE-NEXT: xxspltw v2, vs0, 3
; CHECK-LE-NEXT: blr
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-NEXT: xxspltw v2, vs0, 0
; CHECK-NEXT: blr
entry:
%0 = bitcast i32* %s to <4 x i8>*
%1 = load <4 x i8>, <4 x i8>* %0, align 4
%2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i8> %2
; CHECK-LABEL: test
; CHECK: lxsiwax 34, 0, 3
; CHECK: xxspltw 34, 34, 1
}

View File

@ -1,47 +1,74 @@
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
; RUN: --check-prefix=CHECK-BE
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE
@Globi = external global i32, align 4
@Globf = external global float, align 4
define <2 x i64> @test1(i64 %a, i64 %b) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mtvsrdd v2, r4, r3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test1:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mtvsrdd v2, r3, r4
; CHECK-BE-NEXT: blr
entry:
; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
; which will happen in a subsequent patch.
; CHECK-LABEL: test1
; CHECK: mtvsrdd 34, 4, 3
; CHECK-BE-LABEL: test1
; CHECK-BE: mtvsrdd 34, 3, 4
%vecins = insertelement <2 x i64> undef, i64 %a, i32 0
%vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
ret <2 x i64> %vecins1
}
define i64 @test2(<2 x i64> %a) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mfvsrld r3, v2
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test2:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mfvsrd r3, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test2
; CHECK: mfvsrld 3, 34
%0 = extractelement <2 x i64> %a, i32 0
ret i64 %0
}
define i64 @test3(<2 x i64> %a) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mfvsrd r3, v2
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test3:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: mfvsrld r3, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-BE-LABEL: test3
; CHECK-BE: mfvsrld 3, 34
%0 = extractelement <2 x i64> %a, i32 1
ret i64 %0
}
define <4 x i32> @test4(i32* nocapture readonly %in) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test4:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-BE-NEXT: xxspltw v2, vs0, 0
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test4
; CHECK: lxvwsx 34, 0, 3
; CHECK-NOT: xxspltw
; CHECK-BE-LABEL: test4
; CHECK-BE: lxvwsx 34, 0, 3
; CHECK-BE-NOT: xxspltw
%0 = load i32, i32* %in, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@ -49,13 +76,20 @@ entry:
}
define <4 x float> @test5(float* nocapture readonly %in) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test5:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-BE-NEXT: xxspltw v2, vs0, 0
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test5
; CHECK: lxvwsx 34, 0, 3
; CHECK-NOT: xxspltw
; CHECK-BE-LABEL: test5
; CHECK-BE: lxvwsx 34, 0, 3
; CHECK-BE-NOT: xxspltw
%0 = load float, float* %in, align 4
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
@ -63,17 +97,24 @@ entry:
}
define <4 x i32> @test6() {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test6:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha
; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)
; CHECK-BE-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-BE-NEXT: xxspltw v2, vs0, 0
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test6
; CHECK: addis
; CHECK: ld [[TOC:[0-9]+]], .LC0
; CHECK: lxvwsx 34, 0, 3
; CHECK-NOT: xxspltw
; CHECK-BE-LABEL: test6
; CHECK-BE: addis
; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
; CHECK-BE: lxvwsx 34, 0, 3
; CHECK-BE-NOT: xxspltw
%0 = load i32, i32* @Globi, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
@ -81,17 +122,24 @@ entry:
}
define <4 x float> @test7() {
; CHECK-LABEL: test7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addis r3, r2, .LC1@toc@ha
; CHECK-NEXT: ld r3, .LC1@toc@l(r3)
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test7:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha
; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)
; CHECK-BE-NEXT: lfiwzx f0, 0, r3
; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
; CHECK-BE-NEXT: xxspltw v2, vs0, 0
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test7
; CHECK: addis
; CHECK: ld [[TOC:[0-9]+]], .LC1
; CHECK: lxvwsx 34, 0, 3
; CHECK-NOT: xxspltw
; CHECK-BE-LABEL: test7
; CHECK-BE: addis
; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
; CHECK-BE: lxvwsx 34, 0, 3
; CHECK-BE-NOT: xxspltw
%0 = load float, float* @Globf, align 4
%splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
@ -99,76 +147,120 @@ entry:
}
define <16 x i8> @test8() {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v2, v2, v2
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test8:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxlxor v2, v2, v2
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test8
; CHECK: xxlxor 34, 34, 34
; CHECK-BE-LABEL: test8
; CHECK-BE: xxlxor 34, 34, 34
ret <16 x i8> zeroinitializer
}
define <16 x i8> @test9() {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 1
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test9:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 1
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test9
; CHECK: xxspltib 34, 1
; CHECK-BE-LABEL: test9
; CHECK-BE: xxspltib 34, 1
ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
define <16 x i8> @test10() {
; CHECK-LABEL: test10:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 127
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test10:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 127
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test10
; CHECK: xxspltib 34, 127
; CHECK-BE-LABEL: test10
; CHECK-BE: xxspltib 34, 127
ret <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
}
define <16 x i8> @test11() {
; CHECK-LABEL: test11:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 128
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test11:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 128
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test11
; CHECK: xxspltib 34, 128
; CHECK-BE-LABEL: test11
; CHECK-BE: xxspltib 34, 128
ret <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
}
define <16 x i8> @test12() {
; CHECK-LABEL: test12:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 255
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test12:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 255
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test12
; CHECK: xxspltib 34, 255
; CHECK-BE-LABEL: test12
; CHECK-BE: xxspltib 34, 255
ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
}
define <16 x i8> @test13() {
; CHECK-LABEL: test13:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 129
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test13:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 129
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test13
; CHECK: xxspltib 34, 129
; CHECK-BE-LABEL: test13
; CHECK-BE: xxspltib 34, 129
ret <16 x i8> <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
}
define <16 x i8> @test13E127() {
; CHECK-LABEL: test13E127:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxspltib v2, 200
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test13E127:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: xxspltib v2, 200
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test13E127
; CHECK: xxspltib 34, 200
; CHECK-BE-LABEL: test13E127
; CHECK-BE: xxspltib 34, 200
ret <16 x i8> <i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200, i8 200>
}
define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
; CHECK-LABEL: test14:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lwz r3, 0(r5)
; CHECK-NEXT: mtvsrws v2, r3
; CHECK-NEXT: addi r3, r3, 5
; CHECK-NEXT: stw r3, 0(r5)
; CHECK-NEXT: blr
; CHECK-BE-LABEL: test14:
; CHECK-BE: # %bb.0: # %entry
; CHECK-BE-NEXT: lwz r3, 0(r5)
; CHECK-BE-NEXT: mtvsrws v2, r3
; CHECK-BE-NEXT: addi r3, r3, 5
; CHECK-BE-NEXT: stw r3, 0(r5)
; CHECK-BE-NEXT: blr
entry:
; CHECK-LABEL: test14
; CHECK: lwz [[LD:[0-9]+]],
; CHECK: mtvsrws 34, [[LD]]
; CHECK-BE-LABEL: test14
; CHECK-BE: lwz [[LD:[0-9]+]],
; CHECK-BE: mtvsrws 34, [[LD]]
%0 = load i32, i32* %b, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer

View File

@ -11,9 +11,8 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
define void @draw_llvm_vs_variant0() {
; CHECK-LABEL: draw_llvm_vs_variant0:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ldx r3, 0, r3
; CHECK-NEXT: mtvsrd f0, r3
; CHECK-NEXT: xxswapd v2, vs0
; CHECK-NEXT: lfd f0, 0(r3)
; CHECK-NEXT: xxpermdi v2, f0, f0, 2
; CHECK-NEXT: vmrglh v2, v2, v2
; CHECK-NEXT: vextsh2w v2, v2
; CHECK-NEXT: xvcvsxwsp vs0, v2

View File

@ -1,35 +1,44 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
; Function Attrs: norecurse nounwind readonly
define <4 x double> @foo(double* nocapture readonly %a) #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxvdsx v2, 0, r3
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%0 = load double, double* %a, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i
; CHECK-LABEL: @foo
; CHECK: lfd 1, 0(3)
; CHECK: blr
}
define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
; CHECK-LABEL: foox:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: lxvdsx v2, r3, r4
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8
%vecinit.i = insertelement <4 x double> undef, double %0, i32 0
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %shuffle.i
; CHECK-LABEL: @foox
; CHECK: sldi [[REG1:[0-9]+]], 4, 3
; CHECK: lfdx 1, 3, [[REG1]]
; CHECK: blr
}
define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
; CHECK-LABEL: fooxu:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 3
; CHECK-NEXT: lfdux f0, r3, r4
; CHECK-NEXT: xxspltd v2, vs0, 0
; CHECK-NEXT: std r3, 0(r5)
; CHECK-NEXT: vmr v3, v2
; CHECK-NEXT: blr
entry:
%p = getelementptr double, double* %a, i64 %idx
%0 = load double, double* %p, align 8
@ -37,39 +46,36 @@ entry:
%shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
store double* %p, double** %pptr, align 8
ret <4 x double> %shuffle.i
; CHECK-LABEL: @foox
; CHECK: sldi [[REG1:[0-9]+]], 4, 3
; CHECK: lfdux 1, 3, [[REG1]]
; CHECK: std 3, 0(5)
; CHECK: blr
}
define <4 x float> @foof(float* nocapture readonly %a) #0 {
; CHECK-LABEL: foof:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lfiwzx f0, 0, r3
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
entry:
%0 = load float, float* %a, align 4
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %shuffle.i
; CHECK-LABEL: @foof
; CHECK: lfs 1, 0(3)
; CHECK: blr
}
define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
; CHECK-LABEL: foofx:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: sldi r4, r4, 2
; CHECK-NEXT: lfiwzx f0, r3, r4
; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-NEXT: xxspltw v2, vs0, 3
; CHECK-NEXT: blr
entry:
%p = getelementptr float, float* %a, i64 %idx
%0 = load float, float* %p, align 4
%vecinit.i = insertelement <4 x float> undef, float %0, i32 0
%shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %shuffle.i
; CHECK-LABEL: @foofx
; CHECK: sldi [[REG1:[0-9]+]], 4, 2
; CHECK: lfsx 1, 3, [[REG1]]
; CHECK: blr
}
attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }

View File

@ -0,0 +1,292 @@
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test1:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 0(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
entry:
%0 = load i64, i64* %int64, align 8
%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test2:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 8(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
%0 = load i64, i64* %arrayidx, align 8
%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) {
; P9LE-LABEL: s2v_test3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test3
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: sldi r4, r7, 3
; P9BE-NEXT: lfdx f0, r3, r4
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
%arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom
%0 = load i64, i64* %arrayidx, align 8
%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test4:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 8(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
%0 = load i64, i64* %arrayidx, align 8
%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test5:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 0(r5)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
entry:
%0 = load i64, i64* %ptr1, align 8
%vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) {
; P9LE-LABEL: s2v_test_f1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f1:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 0(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfdx f0, 0, r3
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f1:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfdx f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%0 = load double, double* %f64, align 8
%vecins = insertelement <2 x double> %vec, double %0, i32 0
ret <2 x double> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) {
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f2:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 8(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 8
; P8LE-NEXT: lfdx f0, 0, r3
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f2:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 8
; P8BE-NEXT: lfdx f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds double, double* %f64, i64 1
%0 = load double, double* %arrayidx, align 8
%vecins = insertelement <2 x double> %vec, double %0, i32 0
ret <2 x double> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) {
; P9LE-LABEL: s2v_test_f3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 3
; P9LE-NEXT: lfdx f0, r3, r4
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f3:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: sldi r4, r7, 3
; P9BE-NEXT: lfdx f0, r3, r4
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 3
; P8LE-NEXT: lfdx f0, r3, r4
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f3:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: sldi r4, r7, 3
; P8BE-NEXT: lfdx f0, r3, r4
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
%arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom
%0 = load double, double* %arrayidx, align 8
%vecins = insertelement <2 x double> %vec, double %0, i32 0
ret <2 x double> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) {
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 8(r3)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f4:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 8(r3)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 8
; P8LE-NEXT: lfdx f0, 0, r3
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f4:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 8
; P8BE-NEXT: lfdx f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds double, double* %f64, i64 1
%0 = load double, double* %arrayidx, align 8
%vecins = insertelement <2 x double> %vec, double %0, i32 0
ret <2 x double> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test_f5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfd f0, 0(r5)
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f5:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfd f0, 0(r5)
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfdx f0, 0, r5
; P8LE-NEXT: xxspltd vs0, vs0, 0
; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f5:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfdx f0, 0, r5
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%0 = load double, double* %ptr1, align 8
%vecins = insertelement <2 x double> %vec, double %0, i32 0
ret <2 x double> %vecins
}

View File

@ -0,0 +1,118 @@
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
; P9LE-LABEL: test_liwzx1:
; P9LE: # %bb.0:
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: lfiwzx f1, 0, r4
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xvaddsp vs0, vs0, vs1
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9LE-NEXT: xscvspdpn f0, vs0
; P9LE-NEXT: stfs f0, 0(r5)
; P9LE-NEXT: blr
; P9BE-LABEL: test_liwzx1:
; P9BE: # %bb.0:
; P9BE-NEXT: lfiwzx f0, 0, r3
; P9BE-NEXT: lfiwzx f1, 0, r4
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
; P9BE-NEXT: xvaddsp vs0, vs0, vs1
; P9BE-NEXT: xscvspdpn f0, vs0
; P9BE-NEXT: stfs f0, 0(r5)
; P9BE-NEXT: blr
; P8LE-LABEL: test_liwzx1:
; P8LE: # %bb.0:
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: lfiwzx f1, 0, r4
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xvaddsp vs0, vs0, vs1
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P8LE-NEXT: xscvspdpn f0, vs0
; P8LE-NEXT: stfsx f0, 0, r5
; P8LE-NEXT: blr
; P8BE-LABEL: test_liwzx1:
; P8BE: # %bb.0:
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: lfiwzx f1, 0, r4
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
; P8BE-NEXT: xvaddsp vs0, vs0, vs1
; P8BE-NEXT: xscvspdpn f0, vs0
; P8BE-NEXT: stfsx f0, 0, r5
; P8BE-NEXT: blr
%a = load <1 x float>, <1 x float>* %A
%b = load <1 x float>, <1 x float>* %B
%X = fadd <1 x float> %a, %b
store <1 x float> %X, <1 x float>* %C
ret void
}
define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
; P9LE-LABEL: test_liwzx2:
; P9LE: # %bb.0:
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: lfiwzx f1, 0, r4
; P9LE-NEXT: mr r3, r5
; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
; P9LE-NEXT: xvsubsp vs0, vs0, vs1
; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P9LE-NEXT: xscvspdpn f0, vs0
; P9LE-NEXT: stfs f0, 0(r5)
; P9LE-NEXT: blr
; P9BE-LABEL: test_liwzx2:
; P9BE: # %bb.0:
; P9BE-NEXT: lfiwzx f0, 0, r3
; P9BE-NEXT: lfiwzx f1, 0, r4
; P9BE-NEXT: mr r3, r5
; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
; P9BE-NEXT: xvsubsp vs0, vs0, vs1
; P9BE-NEXT: xscvspdpn f0, vs0
; P9BE-NEXT: stfs f0, 0(r5)
; P9BE-NEXT: blr
; P8LE-LABEL: test_liwzx2:
; P8LE: # %bb.0:
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: lfiwzx f1, 0, r4
; P8LE-NEXT: mr r3, r5
; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
; P8LE-NEXT: xvsubsp vs0, vs0, vs1
; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
; P8LE-NEXT: xscvspdpn f0, vs0
; P8LE-NEXT: stfsx f0, 0, r5
; P8LE-NEXT: blr
; P8BE-LABEL: test_liwzx2:
; P8BE: # %bb.0:
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: lfiwzx f1, 0, r4
; P8BE-NEXT: mr r3, r5
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
; P8BE-NEXT: xvsubsp vs0, vs0, vs1
; P8BE-NEXT: xscvspdpn f0, vs0
; P8BE-NEXT: stfsx f0, 0, r5
; P8BE-NEXT: blr
%a = load <1 x float>, <1 x float>* %A
%b = load <1 x float>, <1 x float>* %B
%X = fsub <1 x float> %a, %b
store <1 x float> %X, <1 x float>* %C
ret <1 x float>* %C
}

View File

@ -0,0 +1,265 @@
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test1:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test1:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwax f0, 0, r3
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test1:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwax f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %int32, align 4
%conv = sext i32 %0 to i64
%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test2:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: addi r3, r3, 4
; P9BE-NEXT: lfiwax f0, 0, r3
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test2:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 4
; P8BE-NEXT: lfiwax f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
%0 = load i32, i32* %arrayidx, align 4
%conv = sext i32 %0 to i64
%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) {
; P9LE-LABEL: s2v_test3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: lfiwax f0, r3, r4
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test3:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: sldi r4, r7, 2
; P9BE-NEXT: lfiwax f0, r3, r4
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: lfiwax f0, r3, r4
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test3:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: sldi r4, r7, 2
; P8BE-NEXT: lfiwax f0, r3, r4
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%conv = sext i32 %0 to i64
%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
; P9LE-LABEL: s2v_test4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test4:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: addi r3, r3, 4
; P9BE-NEXT: lfiwax f0, 0, r3
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test4:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 4
; P8BE-NEXT: lfiwax f0, 0, r3
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
%0 = load i32, i32* %arrayidx, align 4
%conv = sext i32 %0 to i64
%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r5
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: xxpermdi v2, v2, v3, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test5:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwax f0, 0, r5
; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r5
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: xxpermdi v2, v2, v3, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test5:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwax f0, 0, r5
; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %ptr1, align 4
%conv = sext i32 %0 to i64
%vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
ret <2 x i64> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test6:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v2, f0, f0, 2
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test6:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwax f0, 0, r3
; P9BE-NEXT: xxspltd v2, vs0, 0
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test6:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v2, f0, f0, 2
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test6:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwax f0, 0, r3
; P8BE-NEXT: xxspltd v2, vs0, 0
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %ptr, align 4
%conv = sext i32 %0 to i64
%splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat.splat
}
; Function Attrs: norecurse nounwind readonly
define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
; P9LE-LABEL: s2v_test7:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwax f0, 0, r3
; P9LE-NEXT: xxpermdi v2, f0, f0, 2
; P9LE-NEXT: xxspltd v2, v2, 1
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test7:
; P9BE: # %bb.0: # %entry
; P9BE-NEXT: lfiwax f0, 0, r3
; P9BE-NEXT: xxspltd v2, vs0, 0
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test7:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwax f0, 0, r3
; P8LE-NEXT: xxpermdi v2, f0, f0, 2
; P8LE-NEXT: xxspltd v2, v2, 1
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test7:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwax f0, 0, r3
; P8BE-NEXT: xxspltd v2, vs0, 0
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %ptr, align 4
%conv = sext i32 %0 to i64
%splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat.splat
}

View File

@ -0,0 +1,341 @@
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test1:
; P8BE: # %bb.0: # %entry
; P8BE: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %int32, align 4
%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
ret <4 x i32> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test2:
; P8BE: # %bb.0: # %entry
; P8BE: addi r3, r3, 4
; P8BE: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
%0 = load i32, i32* %arrayidx, align 4
%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
ret <4 x i32> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
; P8LE-LABEL: s2v_test3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r5, r7, 2
; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
; P8LE-NEXT: lfiwzx f0, r3, r5
; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test3:
; P8BE: # %bb.0: # %entry
; P8BE: sldi r4, r7, 2
; P8BE: lfiwzx f0, r3, r4
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
ret <4 x i32> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
; P8LE-LABEL: s2v_test4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test4:
; P8BE: # %bb.0: # %entry
; P8BE: addi r3, r3, 4
; P8BE: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
%0 = load i32, i32* %arrayidx, align 4
%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
ret <4 x i32> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
; P8LE-LABEL: s2v_test5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test5:
; P8BE: # %bb.0: # %entry
; P8BE: lfiwzx f0, 0, r5
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%0 = load i32, i32* %ptr1, align 4
%vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
ret <4 x i32> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
; P8LE-LABEL: s2v_test_f1:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
; P8LE-NEXT: lvx v4, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vperm v2, v3, v2, v4
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f1:
; P8BE: # %bb.0: # %entry
; P8BE: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
; P8BE: xxsldwi vs0, v2, vs0, 1
; P8BE: xxsldwi v2, vs0, vs0, 3
; P8BE-NEXT: blr
entry:
%0 = load float, float* %f64, align 4
%vecins = insertelement <4 x float> %vec, float %0, i32 0
ret <4 x float> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) {
; P9LE-LABEL: s2v_test_f2:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f2:
; P9BE: # %bb.0: # %entry
; P9BE: addi r3, r3, 4
; P9BE: xxspltw v2, v2, 1
; P9BE: lfiwzx f0, 0, r3
; P9BE-NEXT: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f2:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f2:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 4
; P8BE-NEXT: xxspltw v2, v2, 1
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi v3, f0, f0, 1
; P8BE-NEXT: vmrghw v2, v3, v2
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
%0 = load float, float* %arrayidx, align 8
%vecins = insertelement <2 x float> %vec, float %0, i32 0
ret <2 x float> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) {
; P9LE-LABEL: s2v_test_f3:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: sldi r4, r7, 2
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: lfiwzx f0, r3, r4
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f3:
; P9BE: # %bb.0: # %entry
; P9BE: sldi r4, r7, 2
; P9BE: xxspltw v2, v2, 1
; P9BE: lfiwzx f0, r3, r4
; P9BE-NEXT: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f3:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: sldi r4, r7, 2
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, r3, r4
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f3:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: sldi r4, r7, 2
; P8BE-NEXT: xxspltw v2, v2, 1
; P8BE-NEXT: lfiwzx f0, r3, r4
; P8BE-NEXT: xxsldwi v3, f0, f0, 1
; P8BE-NEXT: vmrghw v2, v3, v2
; P8BE-NEXT: blr
entry:
%idxprom = sext i32 %Idx to i64
%arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
%0 = load float, float* %arrayidx, align 8
%vecins = insertelement <2 x float> %vec, float %0, i32 0
ret <2 x float> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) {
; P9LE-LABEL: s2v_test_f4:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: addi r3, r3, 4
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: lfiwzx f0, 0, r3
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f4:
; P9BE: # %bb.0: # %entry
; P9BE: addi r3, r3, 4
; P9BE: xxspltw v2, v2, 1
; P9BE: lfiwzx f0, 0, r3
; P9BE-NEXT: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f4:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: addi r3, r3, 4
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: lfiwzx f0, 0, r3
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f4:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: addi r3, r3, 4
; P8BE-NEXT: xxspltw v2, v2, 1
; P8BE-NEXT: lfiwzx f0, 0, r3
; P8BE-NEXT: xxsldwi v3, f0, f0, 1
; P8BE-NEXT: vmrghw v2, v3, v2
; P8BE-NEXT: blr
entry:
%arrayidx = getelementptr inbounds float, float* %f64, i64 1
%0 = load float, float* %arrayidx, align 8
%vecins = insertelement <2 x float> %vec, float %0, i32 0
ret <2 x float> %vecins
}
; Function Attrs: norecurse nounwind readonly
define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
; P9LE-LABEL: s2v_test_f5:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: lfiwzx f0, 0, r5
; P9LE-NEXT: xxspltw v2, v2, 2
; P9LE-NEXT: xxpermdi v3, f0, f0, 2
; P9LE-NEXT: vmrglw v2, v2, v3
; P9LE-NEXT: blr
; P9BE-LABEL: s2v_test_f5:
; P9BE: # %bb.0: # %entry
; P9BE: lfiwzx f0, 0, r5
; P9BE: xxspltw v2, v2, 1
; P9BE-NEXT: xxsldwi v3, f0, f0, 1
; P9BE: vmrghw v2, v3, v2
; P9BE-NEXT: blr
; P8LE-LABEL: s2v_test_f5:
; P8LE: # %bb.0: # %entry
; P8LE-NEXT: lfiwzx f0, 0, r5
; P8LE-NEXT: xxspltw v2, v2, 2
; P8LE-NEXT: xxpermdi v3, f0, f0, 2
; P8LE-NEXT: vmrglw v2, v2, v3
; P8LE-NEXT: blr
; P8BE-LABEL: s2v_test_f5:
; P8BE: # %bb.0: # %entry
; P8BE-NEXT: lfiwzx f0, 0, r5
; P8BE-NEXT: xxspltw v2, v2, 1
; P8BE-NEXT: xxsldwi v3, f0, f0, 1
; P8BE-NEXT: vmrghw v2, v3, v2
; P8BE-NEXT: blr
entry:
%0 = load float, float* %ptr1, align 8
%vecins = insertelement <2 x float> %vec, float %0, i32 0
ret <2 x float> %vecins
}

View File

@ -1,12 +1,15 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -O3 < %s | FileCheck %s
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \
; RUN: --implicit-check-not xxswapd
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mattr=-power9-vector < %s | FileCheck %s
; These tests verify that VSX swap optimization works when loading a scalar
; into a vector register.
@ -17,6 +20,31 @@
@y = global double 1.780000e+00, align 8
define void @bar0() {
; CHECK-LABEL: bar0:
; CHECK: # %bb.0: # %entry
; CHECK: addis r3, r2, .LC0@toc@ha
; CHECK: addis r4, r2, .LC1@toc@ha
; CHECK: ld r3, .LC0@toc@l(r3)
; CHECK: addis r3, r2, .LC2@toc@ha
; CHECK: ld r3, .LC2@toc@l(r3)
; CHECK: xxpermdi vs0, vs0, vs1, 1
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
; CHECK-P9-LABEL: bar0:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9: addis r3, r2, .LC0@toc@ha
; CHECK-P9: addis r4, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC0@toc@l(r3)
; CHECK-P9: ld r4, .LC1@toc@l(r4)
; CHECK-P9: lfd f0, 0(r3)
; CHECK-P9: lxvx vs1, 0, r4
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
; CHECK-P9: xxpermdi vs0, f0, f0, 2
; CHECK-P9: xxpermdi vs0, vs1, vs0, 1
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr
entry:
%0 = load <2 x double>, <2 x double>* @x, align 16
%1 = load double, double* @y, align 8
@ -25,21 +53,32 @@ entry:
ret void
}
; CHECK-LABEL: @bar0
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: lfdx [[REG2:[0-9]+]]
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
; CHECK: stxvd2x [[REG5]]
; CHECK-P9-LABEL: @bar0
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
; CHECK-P9: stxvx [[REG5]]
define void @bar1() {
; CHECK-LABEL: bar1:
; CHECK: # %bb.0: # %entry
; CHECK: addis r3, r2, .LC0@toc@ha
; CHECK: addis r4, r2, .LC1@toc@ha
; CHECK: ld r3, .LC0@toc@l(r3)
; CHECK: addis r3, r2, .LC2@toc@ha
; CHECK: ld r3, .LC2@toc@l(r3)
; CHECK: xxmrghd vs0, vs1, vs0
; CHECK: stxvd2x vs0, 0, r3
; CHECK: blr
;
; CHECK-P9-LABEL: bar1:
; CHECK-P9: # %bb.0: # %entry
; CHECK-P9: addis r3, r2, .LC0@toc@ha
; CHECK-P9: addis r4, r2, .LC1@toc@ha
; CHECK-P9: ld r3, .LC0@toc@l(r3)
; CHECK-P9: ld r4, .LC1@toc@l(r4)
; CHECK-P9: lfd f0, 0(r3)
; CHECK-P9: lxvx vs1, 0, r4
; CHECK-P9: addis r3, r2, .LC2@toc@ha
; CHECK-P9: ld r3, .LC2@toc@l(r3)
; CHECK-P9: xxpermdi vs0, f0, f0, 2
; CHECK-P9: xxmrgld vs0, vs0, vs1
; CHECK-P9: stxvx vs0, 0, r3
; CHECK-P9: blr
entry:
%0 = load <2 x double>, <2 x double>* @x, align 16
%1 = load double, double* @y, align 8
@ -48,17 +87,3 @@ entry:
ret void
}
; CHECK-LABEL: @bar1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: lfdx [[REG2:[0-9]+]]
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
; CHECK: stxvd2x [[REG5]]
; CHECK-P9-LABEL: @bar1
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
; CHECK-P9: stxvx [[REG5]]

View File

@ -1,74 +1,125 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd
define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi0:
; CHECK: # %bb.0:
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: lfdx f1, 0, r4
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: xxspltd vs1, vs1, 0
; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1
; CHECK-NEXT: blr
;
; CHECK-P9-VECTOR-LABEL: testi0:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: testi0:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f0, 0(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2
%r = insertelement <2 x double> %v, double %s, i32 0
ret <2 x double> %r
; CHECK-LABEL: testi0
; CHECK: lxvd2x 0, 0, 3
; CHECK: lfdx 1, 0, 4
; CHECK-DAG: xxspltd 1, 1, 0
; CHECK-DAG: xxswapd 0, 0
; CHECK: xxpermdi 34, 0, 1, 1
; CHECK-P9-LABEL: testi0
; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
}
define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-LABEL: testi1:
; CHECK: # %bb.0:
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: lfdx f1, 0, r4
; CHECK-NEXT: xxswapd vs0, vs0
; CHECK-NEXT: xxspltd vs1, vs1, 0
; CHECK-NEXT: xxmrgld v2, vs1, vs0
; CHECK-NEXT: blr
;
; CHECK-P9-VECTOR-LABEL: testi1:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0
; CHECK-P9-VECTOR-NEXT: blr
;
; CHECK-P9-LABEL: testi1:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f0, 0(r4)
; CHECK-P9-NEXT: lxv vs1, 0(r3)
; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2
%r = insertelement <2 x double> %v, double %s, i32 1
ret <2 x double> %r
; CHECK-LABEL: testi1
; CHECK: lxvd2x 0, 0, 3
; CHECK: lfdx 1, 0, 4
; CHECK-DAG: xxspltd 1, 1, 0
; CHECK-DAG: xxswapd 0, 0
; CHECK: xxmrgld 34, 1, 0
; CHECK-P9-LABEL: testi1
; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
}
define double @teste0(<2 x double>* %p1) {
; CHECK-LABEL: teste0:
; CHECK: # %bb.0:
; CHECK-NEXT: lxvd2x vs1, 0, r3
; CHECK: blr
;
; CHECK-P9-VECTOR-LABEL: teste0:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3
; CHECK-P9-VECTOR: blr
;
; CHECK-P9-LABEL: teste0:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f1, 0(r3)
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%r = extractelement <2 x double> %v, i32 0
ret double %r
; CHECK-LABEL: teste0
; CHECK: lxvd2x 1, 0, 3
; CHECK-P9-LABEL: teste0
; CHECK-P9: lfd 1, 0(3)
}
define double @teste1(<2 x double>* %p1) {
; CHECK-LABEL: teste1:
; CHECK: # %bb.0:
; CHECK-NEXT: lxvd2x vs0, 0, r3
; CHECK-NEXT: xxswapd vs1, vs0
; CHECK: blr
;
; CHECK-P9-VECTOR-LABEL: teste1:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0
; CHECK-P9-VECTOR: blr
;
; CHECK-P9-LABEL: teste1:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: lfd f1, 8(r3)
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%r = extractelement <2 x double> %v, i32 1
ret double %r
; CHECK-LABEL: teste1
; CHECK: lxvd2x 0, 0, 3
; CHECK: xxswapd 1, 0
; CHECK-P9-LABEL: teste1
; CHECK-P9: lfd 1, 8(3)
}

View File

@ -1,13 +1,14 @@
; RUN: llc -mtriple=x86_64-windows-itanium < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU
; RUN: llc -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32
; RUN: llc -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ
; RUN: llc -function-sections -mtriple=x86_64-windows-itanium < %s | FileCheck %s
; RUN: llc -function-sections -mtriple=x86_64-windows-msvc < %s | FileCheck %s
; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU
; RUN: llc -function-sections -mtriple=i686-w64-windows-gnu < %s | FileCheck %s --check-prefix=GNU32
; RUN: llc -function-sections -mtriple=x86_64-w64-windows-gnu < %s -filetype=obj | llvm-objdump - -headers | FileCheck %s --check-prefix=GNUOBJ
; GCC and MSVC handle comdats completely differently. Make sure we do the right
; thing for each.
; Generated with this C++ source:
; Modeled on this C++ source, with additional modifications for
; -ffunction-sections:
; int bar(int);
; __declspec(selectany) int gv = 42;
; inline int foo(int x) { return bar(x) + gv; }
@ -26,8 +27,24 @@ entry:
ret i32 %call
}
; CHECK: .section .text,"xr",one_only,main
; CHECK: main:
; GNU: .section .text$main,"xr",one_only,main
; GNU: main:
; GNU32: .section .text$main,"xr",one_only,_main
; GNU32: _main:
define dso_local x86_fastcallcc i32 @fastcall(i32 %x, i32 %y) {
%rv = add i32 %x, %y
ret i32 %rv
}
; CHECK: .section .text,"xr",one_only,fastcall
; CHECK: fastcall:
; GNU: .section .text$fastcall,"xr",one_only,fastcall
; GNU: fastcall:
; GNU32: .section .text$fastcall,"xr",one_only,@fastcall@8
; GNU32: @fastcall@8:
; Function Attrs: inlinehint uwtable
define linkonce_odr dso_local i32 @_Z3fooi(i32 %x) #1 comdat {
@ -50,9 +67,9 @@ entry:
; GNU: gv:
; GNU: .long 42
; GNU32: .section .text$__Z3fooi,"xr",discard,__Z3fooi
; GNU32: .section .text$_Z3fooi,"xr",discard,__Z3fooi
; GNU32: __Z3fooi:
; GNU32: .section .data$_gv,"dw",discard,_gv
; GNU32: .section .data$gv,"dw",discard,_gv
; GNU32: _gv:
; GNU32: .long 42

View File

@ -0,0 +1,38 @@
; RUN: llc -mtriple mips-unknown-linux-gnu -mattr=+micromips -O3 -filetype=obj -o - %s | llvm-readelf -r | FileCheck %s
; CHECK: .rel.eh_frame
; CHECK: DW.ref.__gxx_personality_v0
; CHECK-NEXT: .text
; CHECK-NEXT: .gcc_except_table
@_ZTIi = external constant i8*
define dso_local i32 @main() local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
%exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
%0 = bitcast i8* %exception.i to i32*
store i32 5, i32* %0, align 16
invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
to label %.noexc unwind label %return
.noexc:
unreachable
return:
%1 = landingpad { i8*, i32 }
catch i8* null
%2 = extractvalue { i8*, i32 } %1, 0
%3 = tail call i8* @__cxa_begin_catch(i8* %2) nounwind
tail call void @__cxa_end_catch()
ret i32 0
}
declare i32 @__gxx_personality_v0(...)
declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr
declare void @__cxa_end_catch() local_unnamed_addr
declare i8* @__cxa_allocate_exception(i32) local_unnamed_addr
declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr

View File

@ -0,0 +1,69 @@
; RUN: opt -S -lcssa < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Reproducer for PR39019.
;
; Verify that the llvm.dbg.value in the %for.cond.cleanup2 block is rewritten
; to use the PHI node for %add that is created by LCSSA.
; CHECK-LABEL: for.cond.cleanup2:
; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %for.cond.cleanup1 ]
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR:![0-9]+]], metadata !DIExpression())
; CHECK-NEXT: call void @bar(i32 [[PN]])
; CHECK-LABEL: for.body:
; CHECK: %add = add nsw i32 0, 2
; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR]], metadata !DIExpression())
; CHECK: [[VAR]] = !DILocalVariable(name: "sum",
; Function Attrs: nounwind
define void @foo() #0 !dbg !6 {
entry:
br label %for.cond.preheader, !dbg !12
for.cond.preheader: ; preds = %for.cond.cleanup1, %entry
br label %for.body, !dbg !12
for.cond.cleanup2: ; preds = %for.cond.cleanup1
call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
tail call void @bar(i32 %add) #0, !dbg !12
ret void, !dbg !12
for.cond.cleanup1: ; preds = %for.body
br i1 false, label %for.cond.preheader, label %for.cond.cleanup2, !dbg !12
for.body: ; preds = %for.body, %for.cond.preheader
%add = add nsw i32 0, 2, !dbg !12
call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
br i1 false, label %for.body, label %for.cond.cleanup1, !dbg !12
}
; Function Attrs: nounwind
declare void @bar(i32) #0
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !2)
!1 = !DIFile(filename: "foo.c", directory: "/")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{!"clang version 8.0.0"}
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 10, type: !7, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, retainedNodes: !8)
!7 = !DISubroutineType(types: !2)
!8 = !{!9}
!9 = !DILocalVariable(name: "sum", scope: !10, file: !1, line: 11, type: !11)
!10 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 0)
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !DILocation(line: 0, scope: !10)

View File

@ -1,12 +1,16 @@
set(TARGETS_TO_APPEND "")
if (LLVM_TARGETS_TO_BUILD MATCHES "X86")
add_subdirectory(X86)
set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} X86" PARENT_SCOPE)
set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} X86")
endif()
if (LLVM_TARGETS_TO_BUILD MATCHES "AArch64")
add_subdirectory(AArch64)
set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} AArch64" PARENT_SCOPE)
set(TARGETS_TO_APPEND "${TARGETS_TO_APPEND} AArch64")
endif()
set(LLVM_EXEGESIS_TARGETS "${LLVM_EXEGESIS_TARGETS} ${TARGETS_TO_APPEND}" PARENT_SCOPE)
add_library(LLVMExegesis
STATIC
Analysis.cpp