Vendor import of llvm trunk r321426:
https://llvm.org/svn/llvm-project/llvm/trunk@321426
This commit is contained in:
parent
c7dac04c34
commit
fd4675b5a0
@ -3987,6 +3987,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
// reassociate and
|
||||
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
|
||||
return RAND;
|
||||
|
||||
// Try to convert a constant mask AND into a shuffle clear mask.
|
||||
if (VT.isVector())
|
||||
if (SDValue Shuffle = XformToShuffleWithZero(N))
|
||||
return Shuffle;
|
||||
|
||||
// fold (and (or x, C), D) -> D if (C & D) == D
|
||||
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
|
||||
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
|
||||
@ -16480,6 +16486,8 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
|
||||
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
|
||||
/// vector_shuffle V, Zero, <0, 4, 2, 4>
|
||||
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = peekThroughBitcast(N->getOperand(1));
|
||||
@ -16490,9 +16498,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
||||
if (LegalOperations)
|
||||
return SDValue();
|
||||
|
||||
if (N->getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return SDValue();
|
||||
|
||||
@ -16581,10 +16586,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
|
||||
N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
|
||||
return Fold;
|
||||
|
||||
// Try to convert a constant mask AND into a shuffle clear mask.
|
||||
if (SDValue Shuffle = XformToShuffleWithZero(N))
|
||||
return Shuffle;
|
||||
|
||||
// Type legalization might introduce new shuffles in the DAG.
|
||||
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
|
||||
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
|
||||
|
@ -1086,7 +1086,7 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
|
||||
return false;
|
||||
}
|
||||
|
||||
Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
|
||||
static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
|
||||
switch (Type) {
|
||||
case MachO::PLATFORM_MACOS: return Triple::MacOSX;
|
||||
case MachO::PLATFORM_IOS: return Triple::IOS;
|
||||
|
@ -423,8 +423,12 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) {
|
||||
if (L.isNot(AsmToken::Comma))
|
||||
return TokError("expected group name");
|
||||
Lex();
|
||||
if (getParser().parseIdentifier(GroupName))
|
||||
if (L.is(AsmToken::Integer)) {
|
||||
GroupName = getTok().getString();
|
||||
Lex();
|
||||
} else if (getParser().parseIdentifier(GroupName)) {
|
||||
return true;
|
||||
}
|
||||
if (L.is(AsmToken::Comma)) {
|
||||
Lex();
|
||||
StringRef Linkage;
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Implementation of permutation networks.
|
||||
|
||||
@ -147,6 +149,7 @@ struct Coloring {
|
||||
void build();
|
||||
bool color();
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) {
|
||||
uint8_t Color = None;
|
||||
@ -300,6 +303,7 @@ void Coloring::dump() const {
|
||||
dbgs() << " }\n}\n";
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Base class of for reordering networks. They don't strictly need to be
|
||||
// permutations, as outputs with repeated occurrences of an input element
|
||||
// are allowed.
|
||||
@ -408,7 +412,7 @@ struct BenesNetwork : public PermNetwork {
|
||||
private:
|
||||
bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
|
||||
unsigned Step) {
|
||||
@ -602,6 +606,7 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
|
||||
// Support for building selection results (output instructions that are
|
||||
// parts of the final selection).
|
||||
|
||||
namespace {
|
||||
struct OpRef {
|
||||
OpRef(SDValue V) : OpV(V) {}
|
||||
bool isValue() const { return OpV.getNode() != nullptr; }
|
||||
@ -689,6 +694,7 @@ struct ResultStack {
|
||||
|
||||
void print(raw_ostream &OS, const SelectionDAG &G) const;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
|
||||
if (isValue()) {
|
||||
@ -740,6 +746,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ShuffleMask {
|
||||
ShuffleMask(ArrayRef<int> M) : Mask(M) {
|
||||
for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
|
||||
@ -763,6 +770,7 @@ struct ShuffleMask {
|
||||
return ShuffleMask(Mask.take_back(H));
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// The HvxSelector class.
|
||||
|
@ -15543,7 +15543,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
|
||||
@ -15551,9 +15550,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
DAG.getUNDEF(SrcVT)));
|
||||
}
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(VT == MVT::v2f64 && "Unexpected type");
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
|
||||
@ -15903,9 +15908,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1)
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
|
||||
@ -33047,10 +33058,8 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
// The right side has to be a 'trunc' or a constant vector.
|
||||
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
|
||||
N1.getOperand(0).getValueType() == VT;
|
||||
ConstantSDNode *RHSConstSplat = nullptr;
|
||||
if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
|
||||
RHSConstSplat = RHSBV->getConstantSplatNode();
|
||||
if (!RHSTrunc && !RHSConstSplat)
|
||||
if (!RHSTrunc &&
|
||||
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
|
||||
return SDValue();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
@ -33060,13 +33069,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Set N0 and N1 to hold the inputs to the new wide operation.
|
||||
N0 = N0->getOperand(0);
|
||||
if (RHSConstSplat) {
|
||||
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(),
|
||||
SDValue(RHSConstSplat, 0));
|
||||
N1 = DAG.getSplatBuildVector(VT, DL, N1);
|
||||
} else if (RHSTrunc) {
|
||||
if (RHSTrunc)
|
||||
N1 = N1->getOperand(0);
|
||||
}
|
||||
else
|
||||
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
|
||||
|
||||
// Generate the wide operation.
|
||||
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
|
||||
|
@ -141,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
|
||||
// These instructions cannot address 80-bit memory.
|
||||
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
|
||||
bit Forward = 1> {
|
||||
let mayLoad = 1, hasSideEffects = 1 in {
|
||||
// ST(0) = ST(0) + [mem]
|
||||
def _Fp32m : FpIf32<(outs RFP32:$dst),
|
||||
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
@ -177,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
|
||||
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
|
||||
let mayLoad = 1 in
|
||||
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
|
||||
!strconcat("f", asmstring, "{s}\t$src")>;
|
||||
let mayLoad = 1 in
|
||||
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
|
||||
!strconcat("f", asmstring, "{l}\t$src")>;
|
||||
// ST(0) = ST(0) + [memint]
|
||||
@ -226,12 +225,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
|
||||
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
|
||||
let mayLoad = 1 in
|
||||
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
|
||||
!strconcat("fi", asmstring, "{s}\t$src")>;
|
||||
let mayLoad = 1 in
|
||||
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
|
||||
!strconcat("fi", asmstring, "{l}\t$src")>;
|
||||
} // mayLoad = 1, hasSideEffects = 1
|
||||
}
|
||||
|
||||
let Defs = [FPSW] in {
|
||||
|
@ -832,9 +832,11 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">;
|
||||
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
|
||||
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
|
||||
def PKU : Predicate<"Subtarget->hasPKU()">;
|
||||
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
|
||||
def HasVNNI : Predicate<"Subtarget->hasVNNI()">,
|
||||
AssemblerPredicate<"FeatureVNNI", "AVX-512 VNNI ISA">;
|
||||
|
||||
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
|
||||
def HasBITALG : Predicate<"Subtarget->hasBITALG()">,
|
||||
AssemblerPredicate<"FeatureBITALG", "AVX-512 BITALG ISA">;
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
def HasVAES : Predicate<"Subtarget->hasVAES()">;
|
||||
@ -866,7 +868,8 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
||||
def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
|
||||
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
|
||||
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
|
||||
def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
|
||||
def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">,
|
||||
AssemblerPredicate<"FeatureVBMI2", "AVX-512 VBMI2 ISA">;
|
||||
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
|
||||
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
|
||||
def HasRTM : Predicate<"Subtarget->hasRTM()">;
|
||||
|
@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
|
||||
/// If we are able to find such sequence, we return the instructions
|
||||
/// we found, namely %casted_phi and the instructions on its use-def chain up
|
||||
/// to the phi (not including the phi).
|
||||
bool getCastsForInductionPHI(
|
||||
PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
|
||||
const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
|
||||
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
|
||||
const SCEVUnknown *PhiScev,
|
||||
const SCEVAddRecExpr *AR,
|
||||
SmallVectorImpl<Instruction *> &CastInsts) {
|
||||
|
||||
assert(CastInsts.empty() && "CastInsts is expected to be empty.");
|
||||
auto *PN = cast<PHINode>(PhiScev->getValue());
|
||||
|
@ -228,14 +228,9 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; KNL-NEXT: callq _func8xi1
|
||||
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: movb $85, %al
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
@ -247,12 +242,9 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
|
||||
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _func8xi1
|
||||
; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
||||
; SKX-NEXT: movb $85, %al
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: kandb %k1, %k0, %k0
|
||||
; SKX-NEXT: vpmovm2w %k0, %xmm0
|
||||
; SKX-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
@ -264,14 +256,9 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
|
||||
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL_X32-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; KNL_X32-NEXT: calll _func8xi1
|
||||
; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL_X32-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL_X32-NEXT: movb $85, %al
|
||||
; KNL_X32-NEXT: kmovw %eax, %k1
|
||||
; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
|
||||
; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL_X32-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; KNL_X32-NEXT: vpand LCPI7_0, %xmm0, %xmm0
|
||||
; KNL_X32-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; KNL_X32-NEXT: vpsraw $15, %xmm0, %xmm0
|
||||
; KNL_X32-NEXT: addl $12, %esp
|
||||
; KNL_X32-NEXT: retl
|
||||
%cmpRes = icmp sgt <8 x i32>%a, %b
|
||||
|
@ -1691,8 +1691,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
||||
; VLDQ-NEXT: vpmovm2q %k0, %xmm0
|
||||
; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: vpmovm2d %k0, %xmm0
|
||||
; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: sbto2f64:
|
||||
@ -1700,12 +1700,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
|
||||
; VLNODQ-NEXT: vmovq %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
|
||||
; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; VLNODQ-NEXT: retq
|
||||
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
|
||||
%1 = sitofp <2 x i1> %cmpres to <2 x double>
|
||||
@ -2002,30 +1998,22 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
||||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||
; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
|
||||
; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; NOVL-NEXT: vzeroupper
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VLDQ-LABEL: ubto2f64:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: ubto2f64:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
|
||||
; VLNODQ-NEXT: vmovq %xmm0, %rax
|
||||
; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
|
||||
; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; VLNODQ-NEXT: retq
|
||||
; VL-LABEL: ubto2f64:
|
||||
; VL: # %bb.0:
|
||||
; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||
; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
|
||||
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; VL-NEXT: vcvtudq2pd %xmm0, %xmm0
|
||||
; VL-NEXT: retq
|
||||
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
||||
%1 = uitofp <2 x i1> %mask to <2 x double>
|
||||
ret <2 x double> %1
|
||||
|
@ -2602,16 +2602,16 @@ define <2 x double> @sbto2f64(<2 x double> %a) {
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
|
||||
; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: sbto2f64:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
|
||||
; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
|
||||
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
|
||||
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
|
||||
%1 = sitofp <2 x i1> %cmpres to <2 x double>
|
||||
@ -2989,8 +2989,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
||||
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
|
||||
; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
|
||||
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
|
||||
; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: ubto2f64:
|
||||
@ -2998,8 +2998,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) {
|
||||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
|
||||
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
|
||||
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
|
||||
; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
|
||||
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
|
||||
; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
|
||||
; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
|
||||
; SKX-NEXT: retq # sched: [7:1.00]
|
||||
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
||||
%1 = uitofp <2 x i1> %mask to <2 x double>
|
||||
|
136
test/CodeGen/X86/pr34080-2.ll
Normal file
136
test/CodeGen/X86/pr34080-2.ll
Normal file
@ -0,0 +1,136 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i386-unknown-openbsd6.2 | FileCheck %s
|
||||
|
||||
%struct.DateTime = type { i64, i32, i32, i32, i32, i32, double, i8 }
|
||||
|
||||
define void @computeJD(%struct.DateTime*) nounwind {
|
||||
; CHECK-LABEL: computeJD:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushl %ebp
|
||||
; CHECK-NEXT: movl %esp, %ebp
|
||||
; CHECK-NEXT: pushl %ebx
|
||||
; CHECK-NEXT: pushl %edi
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: andl $-8, %esp
|
||||
; CHECK-NEXT: subl $32, %esp
|
||||
; CHECK-NEXT: movl 8(%ebp), %ebx
|
||||
; CHECK-NEXT: movl 8(%ebx), %esi
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpl $3, 12(%ebx)
|
||||
; CHECK-NEXT: setl %al
|
||||
; CHECK-NEXT: subl %eax, %esi
|
||||
; CHECK-NEXT: movl $-1374389535, %ecx # imm = 0xAE147AE1
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: imull %ecx
|
||||
; CHECK-NEXT: movl %edx, %ecx
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: shrl $31, %eax
|
||||
; CHECK-NEXT: sarl $5, %ecx
|
||||
; CHECK-NEXT: addl %eax, %ecx
|
||||
; CHECK-NEXT: movl $1374389535, %edx # imm = 0x51EB851F
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: imull %edx
|
||||
; CHECK-NEXT: movl %edx, %edi
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: shrl $31, %eax
|
||||
; CHECK-NEXT: sarl $7, %edi
|
||||
; CHECK-NEXT: addl %eax, %edi
|
||||
; CHECK-NEXT: imull $36525, %esi, %eax # imm = 0x8EAD
|
||||
; CHECK-NEXT: addl $172251900, %eax # imm = 0xA445AFC
|
||||
; CHECK-NEXT: movl $1374389535, %edx # imm = 0x51EB851F
|
||||
; CHECK-NEXT: imull %edx
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: shrl $31, %eax
|
||||
; CHECK-NEXT: sarl $5, %edx
|
||||
; CHECK-NEXT: addl %eax, %edx
|
||||
; CHECK-NEXT: addl 16(%ebx), %ecx
|
||||
; CHECK-NEXT: addl %edi, %ecx
|
||||
; CHECK-NEXT: leal 257(%ecx,%edx), %eax
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fildl {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fadds {{\.LCPI.*}}
|
||||
; CHECK-NEXT: fmuls {{\.LCPI.*}}
|
||||
; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F
|
||||
; CHECK-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fldcw {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movb $1, 36(%ebx)
|
||||
; CHECK-NEXT: imull $3600000, 20(%ebx), %eax # imm = 0x36EE80
|
||||
; CHECK-NEXT: imull $60000, 24(%ebx), %ecx # imm = 0xEA60
|
||||
; CHECK-NEXT: addl %eax, %ecx
|
||||
; CHECK-NEXT: fldl 28(%ebx)
|
||||
; CHECK-NEXT: fmuls {{\.LCPI.*}}
|
||||
; CHECK-NEXT: fnstcw (%esp)
|
||||
; CHECK-NEXT: movzwl (%esp), %eax
|
||||
; CHECK-NEXT: movw $3199, (%esp) # imm = 0xC7F
|
||||
; CHECK-NEXT: fldcw (%esp)
|
||||
; CHECK-NEXT: movw %ax, (%esp)
|
||||
; CHECK-NEXT: movl %ecx, %eax
|
||||
; CHECK-NEXT: sarl $31, %eax
|
||||
; CHECK-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: fldcw (%esp)
|
||||
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: adcl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: adcl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl %ecx, (%ebx)
|
||||
; CHECK-NEXT: movl %eax, 4(%ebx)
|
||||
; CHECK-NEXT: leal -12(%ebp), %esp
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: popl %edi
|
||||
; CHECK-NEXT: popl %ebx
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl
|
||||
%2 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 7
|
||||
%3 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 1
|
||||
%4 = load i32, i32* %3, align 4
|
||||
%5 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 2
|
||||
%6 = load i32, i32* %5, align 4
|
||||
%7 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 3
|
||||
%8 = load i32, i32* %7, align 4
|
||||
%9 = icmp slt i32 %6, 3
|
||||
%10 = add i32 %6, 12
|
||||
%11 = select i1 %9, i32 %10, i32 %6
|
||||
%12 = sext i1 %9 to i32
|
||||
%13 = add i32 %4, %12
|
||||
%14 = sdiv i32 %13, -100
|
||||
%15 = sdiv i32 %13, 400
|
||||
%16 = mul i32 %13, 36525
|
||||
%17 = add i32 %16, 172251900
|
||||
%18 = sdiv i32 %17, 100
|
||||
%19 = mul i32 %11, 306001
|
||||
%20 = add i32 %19, 306001
|
||||
%21 = sdiv i32 %20, 10000
|
||||
%22 = add i32 %8, 2
|
||||
%23 = add i32 %22, %14
|
||||
%24 = add i32 %23, %15
|
||||
%25 = add i32 %24, 255
|
||||
%26 = add i32 %25, %18
|
||||
%27 = sitofp i32 %26 to double
|
||||
%28 = fadd double %27, -1.524500e+03
|
||||
%29 = fmul double %28, 8.640000e+07
|
||||
%30 = fptosi double %29 to i64
|
||||
%31 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 0
|
||||
store i8 1, i8* %2, align 4
|
||||
%32 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 4
|
||||
%33 = load i32, i32* %32, align 4
|
||||
%34 = mul i32 %33, 3600000
|
||||
%35 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 5
|
||||
%36 = load i32, i32* %35, align 4
|
||||
%37 = mul i32 %36, 60000
|
||||
%38 = add i32 %37, %34
|
||||
%39 = sext i32 %38 to i64
|
||||
%40 = getelementptr inbounds %struct.DateTime, %struct.DateTime* %0, i32 0, i32 6
|
||||
%41 = load double, double* %40, align 4
|
||||
%42 = fmul double %41, 1.000000e+03
|
||||
%43 = fptosi double %42 to i64
|
||||
%44 = add i64 %39, %43
|
||||
%45 = add i64 %44, %30
|
||||
store i64 %45, i64* %31, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i486" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-BROKEN
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=prescott | FileCheck %s --check-prefix=SSE3
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX
|
||||
@ -46,44 +46,44 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
|
||||
; SSE2-NEXT: popq %rbp
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE2-BROKEN-LABEL: _Z1fe:
|
||||
; SSE2-BROKEN: ## %bb.0: ## %entry
|
||||
; SSE2-BROKEN-NEXT: pushq %rbp
|
||||
; SSE2-BROKEN-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE2-BROKEN-NEXT: .cfi_offset %rbp, -16
|
||||
; SSE2-BROKEN-NEXT: movq %rsp, %rbp
|
||||
; SSE2-BROKEN-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SSE2-BROKEN-NEXT: fnstcw -4(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fldt 16(%rbp)
|
||||
; SSE2-BROKEN-NEXT: movzwl -4(%rbp), %eax
|
||||
; SSE2-BROKEN-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F
|
||||
; SSE2-BROKEN-NEXT: fldcw -4(%rbp)
|
||||
; SSE2-BROKEN-NEXT: movw %ax, -4(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fistl -8(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fldcw -4(%rbp)
|
||||
; SSE2-BROKEN-NEXT: cvtsi2sdl -8(%rbp), %xmm0
|
||||
; SSE2-BROKEN-NEXT: movsd %xmm0, -64(%rbp)
|
||||
; SSE2-BROKEN-NEXT: movsd %xmm0, -32(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fsubl -32(%rbp)
|
||||
; SSE2-BROKEN-NEXT: flds {{.*}}(%rip)
|
||||
; SSE2-BROKEN-NEXT: fnstcw -2(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fmul %st(0), %st(1)
|
||||
; SSE2-BROKEN-NEXT: movzwl -2(%rbp), %eax
|
||||
; SSE2-BROKEN-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F
|
||||
; SSE2-BROKEN-NEXT: fldcw -2(%rbp)
|
||||
; SSE2-BROKEN-NEXT: movw %ax, -2(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fxch %st(1)
|
||||
; SSE2-BROKEN-NEXT: fistl -12(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fldcw -2(%rbp)
|
||||
; SSE2-BROKEN-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-BROKEN-NEXT: cvtsi2sdl -12(%rbp), %xmm0
|
||||
; SSE2-BROKEN-NEXT: movsd %xmm0, -56(%rbp)
|
||||
; SSE2-BROKEN-NEXT: movsd %xmm0, -24(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fsubl -24(%rbp)
|
||||
; SSE2-BROKEN-NEXT: fmulp %st(1)
|
||||
; SSE2-BROKEN-NEXT: fstpl -48(%rbp)
|
||||
; SSE2-BROKEN-NEXT: popq %rbp
|
||||
; SSE2-BROKEN-NEXT: retq
|
||||
; SSE2-SCHEDULE-LABEL: _Z1fe:
|
||||
; SSE2-SCHEDULE: ## %bb.0: ## %entry
|
||||
; SSE2-SCHEDULE-NEXT: pushq %rbp
|
||||
; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_offset 16
|
||||
; SSE2-SCHEDULE-NEXT: .cfi_offset %rbp, -16
|
||||
; SSE2-SCHEDULE-NEXT: movq %rsp, %rbp
|
||||
; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_register %rbp
|
||||
; SSE2-SCHEDULE-NEXT: fnstcw -4(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fldt 16(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: movzwl -4(%rbp), %eax
|
||||
; SSE2-SCHEDULE-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F
|
||||
; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: movw %ax, -4(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fistl -8(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: cvtsi2sdl -8(%rbp), %xmm0
|
||||
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip)
|
||||
; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1)
|
||||
; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax
|
||||
; SSE2-SCHEDULE-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F
|
||||
; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: movw %ax, -2(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fxch %st(1)
|
||||
; SSE2-SCHEDULE-NEXT: fistl -12(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-SCHEDULE-NEXT: cvtsi2sdl -12(%rbp), %xmm0
|
||||
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -56(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: movsd %xmm0, -24(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fsubl -24(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: fmulp %st(1)
|
||||
; SSE2-SCHEDULE-NEXT: fstpl -48(%rbp)
|
||||
; SSE2-SCHEDULE-NEXT: popq %rbp
|
||||
; SSE2-SCHEDULE-NEXT: retq
|
||||
;
|
||||
; SSE3-LABEL: _Z1fe:
|
||||
; SSE3: ## %bb.0: ## %entry
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32-AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64-AVX2
|
||||
|
||||
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
|
||||
; X32-LABEL: and_masks:
|
||||
@ -31,6 +33,37 @@ define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
|
||||
; X64-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-AVX2-LABEL: and_masks:
|
||||
; X32-AVX2: ## %bb.0:
|
||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-AVX2-NEXT: vmovups (%edx), %ymm0
|
||||
; X32-AVX2-NEXT: vmovups (%ecx), %ymm1
|
||||
; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
|
||||
; X32-AVX2-NEXT: vmovups (%eax), %ymm2
|
||||
; X32-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
|
||||
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||
; X32-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X32-AVX2-NEXT: vzeroupper
|
||||
; X32-AVX2-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: and_masks:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vmovups (%rdi), %ymm0
|
||||
; X64-AVX2-NEXT: vmovups (%rsi), %ymm1
|
||||
; X64-AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: vmovups (%rdx), %ymm2
|
||||
; X64-AVX2-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
|
||||
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
|
||||
; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1
|
||||
; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%v0 = load <8 x float>, <8 x float>* %a, align 16
|
||||
%v1 = load <8 x float>, <8 x float>* %b, align 16
|
||||
%m0 = fcmp olt <8 x float> %v1, %v0
|
||||
@ -62,6 +95,28 @@ define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
|
||||
; X64-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-AVX2-LABEL: neg_masks:
|
||||
; X32-AVX2: ## %bb.0:
|
||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-AVX2-NEXT: vmovups (%ecx), %ymm0
|
||||
; X32-AVX2-NEXT: vcmpnltps (%eax), %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||
; X32-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: vmovaps %ymm0, (%eax)
|
||||
; X32-AVX2-NEXT: vzeroupper
|
||||
; X32-AVX2-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: neg_masks:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vmovups (%rsi), %ymm0
|
||||
; X64-AVX2-NEXT: vcmpnltps (%rdi), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
|
||||
; X64-AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vmovaps %ymm0, (%rax)
|
||||
; X64-AVX2-NEXT: vzeroupper
|
||||
; X64-AVX2-NEXT: retq
|
||||
%v0 = load <8 x float>, <8 x float>* %a, align 16
|
||||
%v1 = load <8 x float>, <8 x float>* %b, align 16
|
||||
%m0 = fcmp olt <8 x float> %v1, %v0
|
||||
@ -71,3 +126,50 @@ define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
|
||||
; X32-LABEL: and_mask_constant:
|
||||
; X32: ## %bb.0:
|
||||
; X32-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X32-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X32-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
||||
; X32-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpand LCPI2_0, %xmm0, %xmm0
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: and_mask_constant:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
|
||||
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
|
||||
; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||
; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-AVX2-LABEL: and_mask_constant:
|
||||
; X32-AVX2: ## %bb.0:
|
||||
; X32-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X32-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: vpand LCPI2_0, %ymm0, %ymm0
|
||||
; X32-AVX2-NEXT: retl
|
||||
;
|
||||
; X64-AVX2-LABEL: and_mask_constant:
|
||||
; X64-AVX2: ## %bb.0:
|
||||
; X64-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; X64-AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; X64-AVX2-NEXT: retq
|
||||
%m = icmp eq <8 x i32> %v0, zeroinitializer
|
||||
%mand = and <8 x i1> %m, <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>
|
||||
%r = zext <8 x i1> %mand to <8 x i32>
|
||||
ret <8 x i32> %r
|
||||
}
|
||||
|
28
test/MC/ELF/comdat-name-number.s
Normal file
28
test/MC/ELF/comdat-name-number.s
Normal file
@ -0,0 +1,28 @@
|
||||
// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -filetype=obj -o %t.o
|
||||
// RUN: llvm-readobj -elf-section-groups %t.o | FileCheck %s
|
||||
|
||||
// Test that we can handle numeric COMDAT names.
|
||||
|
||||
.section .foo,"G",@progbits,123,comdat
|
||||
.section .bar,"G",@progbits,abc,comdat
|
||||
|
||||
// CHECK: Groups {
|
||||
// CHECK-NEXT: Group {
|
||||
// CHECK-NEXT: Name: .group
|
||||
// CHECK-NEXT: Index:
|
||||
// CHECK-NEXT: Type: COMDAT
|
||||
// CHECK-NEXT: Signature: 123
|
||||
// CHECK-NEXT: Section(s) in group [
|
||||
// CHECK-NEXT: .foo
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: Group {
|
||||
// CHECK-NEXT: Name: .group
|
||||
// CHECK-NEXT: Index:
|
||||
// CHECK-NEXT: Type: COMDAT
|
||||
// CHECK-NEXT: Signature: abc
|
||||
// CHECK-NEXT: Section(s) in group [
|
||||
// CHECK-NEXT: .bar
|
||||
// CHECK-NEXT: ]
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
Loading…
Reference in New Issue
Block a user