Vendor import of llvm release_60 branch r325330:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@325330
This commit is contained in:
parent
6d18171c19
commit
3c315f3a8e
@ -71,6 +71,13 @@ Non-comprehensive list of changes in this release
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
Changes to the AArch64 Target
|
||||
-----------------------------
|
||||
|
||||
During this release:
|
||||
|
||||
* Enabled the new GlobalISel instruction selection framework by default at ``-O0``.
|
||||
|
||||
Changes to the ARM Target
|
||||
-------------------------
|
||||
|
||||
@ -80,6 +87,28 @@ During this release the ARM target has:
|
||||
isn't the default.
|
||||
|
||||
|
||||
Changes to the Hexagon Target
|
||||
-----------------------------
|
||||
|
||||
* The Hexagon backend now supports V65 ISA.
|
||||
|
||||
* The ``-mhvx`` option now takes an optional value that specified the ISA
|
||||
version of the HVX coprocessor. The available values are v60, v62 and v65.
|
||||
By default, the value is set to be the same as the CPU version.
|
||||
|
||||
* The compiler option ``-mhvx-double`` is deprecated and will be removed in
|
||||
the next release of the compiler. Programmers should use ``-mhvx-length``
|
||||
option to specify the desired vector length: ``-mhvx-length=64b`` for
|
||||
64-byte vectors and ``-mhvx-length=128b`` for 128-byte vectors. While the
|
||||
current default vector length is 64 bytes, users should always specify the
|
||||
length explicitly, since the default value may change in the future.
|
||||
|
||||
* The target feature ``hvx-double`` is deprecated and will be removed in the
|
||||
next release. LLVM IR generators should use target features ``hvx-length64b``
|
||||
and ``hvx-length128b`` to indicate the vector length. The length should
|
||||
always be specified when HVX code generation is enabled.
|
||||
|
||||
|
||||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
@ -91,6 +120,15 @@ Changes to the PowerPC Target
|
||||
|
||||
During this release ...
|
||||
|
||||
Changes to the SystemZ Target
|
||||
-----------------------------
|
||||
|
||||
During this release the SystemZ target has:
|
||||
|
||||
* Added support for 128-bit atomic operations.
|
||||
|
||||
* Added support for the "o" constraint for inline asm statements.
|
||||
|
||||
Changes to the X86 Target
|
||||
-------------------------
|
||||
|
||||
|
@ -238,6 +238,26 @@ def int_amdgcn_cvt_pkrtz : Intrinsic<
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
|
||||
[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
|
||||
[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
def int_amdgcn_cvt_pk_i16 : Intrinsic<
|
||||
[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
def int_amdgcn_cvt_pk_u16 : Intrinsic<
|
||||
[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
def int_amdgcn_class : Intrinsic<
|
||||
[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
|
@ -3738,6 +3738,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
38
include/llvm/MC/MCAsmMacro.h
Normal file
38
include/llvm/MC/MCAsmMacro.h
Normal file
@ -0,0 +1,38 @@
|
||||
//===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_MC_MCASMMACRO_H
|
||||
#define LLVM_MC_MCASMMACRO_H
|
||||
|
||||
#include "llvm/MC/MCParser/MCAsmLexer.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
struct MCAsmMacroParameter {
|
||||
StringRef Name;
|
||||
std::vector<AsmToken> Value;
|
||||
bool Required = false;
|
||||
bool Vararg = false;
|
||||
|
||||
MCAsmMacroParameter() = default;
|
||||
};
|
||||
|
||||
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
|
||||
struct MCAsmMacro {
|
||||
StringRef Name;
|
||||
StringRef Body;
|
||||
MCAsmMacroParameters Parameters;
|
||||
|
||||
public:
|
||||
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
|
||||
: Name(N), Body(B), Parameters(std::move(P)) {}
|
||||
};
|
||||
}; // namespace llvm
|
||||
|
||||
#endif
|
@ -18,6 +18,7 @@
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/BinaryFormat/Dwarf.h"
|
||||
#include "llvm/MC/MCAsmMacro.h"
|
||||
#include "llvm/MC/MCDwarf.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/SectionKind.h"
|
||||
@ -268,6 +269,9 @@ namespace llvm {
|
||||
unsigned UniqueID,
|
||||
const MCSymbolELF *Associated);
|
||||
|
||||
/// \brief Map of currently defined macros.
|
||||
StringMap<MCAsmMacro> MacroMap;
|
||||
|
||||
public:
|
||||
explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI,
|
||||
const MCObjectFileInfo *MOFI,
|
||||
@ -618,6 +622,17 @@ namespace llvm {
|
||||
// FIXME: We should really do something about that.
|
||||
LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L,
|
||||
const Twine &Msg);
|
||||
|
||||
const MCAsmMacro *lookupMacro(StringRef Name) {
|
||||
StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
|
||||
return (I == MacroMap.end()) ? nullptr : &I->getValue();
|
||||
}
|
||||
|
||||
void defineMacro(StringRef Name, MCAsmMacro Macro) {
|
||||
MacroMap.insert(std::make_pair(Name, std::move(Macro)));
|
||||
}
|
||||
|
||||
void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -698,24 +698,20 @@ struct SemiNCAInfo {
|
||||
return;
|
||||
|
||||
// Recalculate the set of roots.
|
||||
DT.Roots = FindRoots(DT, BUI);
|
||||
for (const NodePtr R : DT.Roots) {
|
||||
const TreeNodePtr TN = DT.getNode(R);
|
||||
// A CFG node was selected as a tree root, but the corresponding tree node
|
||||
// is not connected to the virtual root. This is because the incremental
|
||||
// algorithm does not really know or use the set of roots and can make a
|
||||
// different (implicit) decision about which nodes within an infinite loop
|
||||
// becomes a root.
|
||||
if (TN && !DT.isVirtualRoot(TN->getIDom())) {
|
||||
DEBUG(dbgs() << "Root " << BlockNamePrinter(R)
|
||||
<< " is not virtual root's child\n"
|
||||
<< "The entire tree needs to be rebuilt\n");
|
||||
// It should be possible to rotate the subtree instead of recalculating
|
||||
// the whole tree, but this situation happens extremely rarely in
|
||||
// practice.
|
||||
CalculateFromScratch(DT, BUI);
|
||||
return;
|
||||
}
|
||||
auto Roots = FindRoots(DT, BUI);
|
||||
if (DT.Roots.size() != Roots.size() ||
|
||||
!std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) {
|
||||
// The roots chosen in the CFG have changed. This is because the
|
||||
// incremental algorithm does not really know or use the set of roots and
|
||||
// can make a different (implicit) decision about which node within an
|
||||
// infinite loop becomes a root.
|
||||
|
||||
DEBUG(dbgs() << "Roots are different in updated trees\n"
|
||||
<< "The entire tree needs to be rebuilt\n");
|
||||
// It may be possible to update the tree without recalculating it, but
|
||||
// we do not know yet how to do it, and it happens rarely in practise.
|
||||
CalculateFromScratch(DT, BUI);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -163,7 +163,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
|
||||
|
||||
DIType *BaseType = DDTy->getBaseType().resolve();
|
||||
|
||||
assert(BaseType && "Unexpected invalid base type");
|
||||
if (!BaseType)
|
||||
return 0;
|
||||
|
||||
// If this is a derived type, go ahead and get the base type, unless it's a
|
||||
// reference then it's just the size of the field. Pointer types have no need
|
||||
|
@ -1391,7 +1391,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
|
||||
if (!Name.empty())
|
||||
addString(MemberDie, dwarf::DW_AT_name, Name);
|
||||
|
||||
addType(MemberDie, resolve(DT->getBaseType()));
|
||||
if (DIType *Resolved = resolve(DT->getBaseType()))
|
||||
addType(MemberDie, Resolved);
|
||||
|
||||
addSourceLine(MemberDie, DT);
|
||||
|
||||
|
@ -205,14 +205,18 @@ void LivePhysRegs::addPristines(const MachineFunction &MF) {
|
||||
}
|
||||
|
||||
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
|
||||
if (!MBB.succ_empty()) {
|
||||
// To get the live-outs we simply merge the live-ins of all successors.
|
||||
for (const MachineBasicBlock *Succ : MBB.successors())
|
||||
addBlockLiveIns(*Succ);
|
||||
} else if (MBB.isReturnBlock()) {
|
||||
// For the return block: Add all callee saved registers that are saved and
|
||||
// restored (somewhere); This does not include callee saved registers that
|
||||
// are unused and hence not saved and restored; they are called pristine.
|
||||
// To get the live-outs we simply merge the live-ins of all successors.
|
||||
for (const MachineBasicBlock *Succ : MBB.successors())
|
||||
addBlockLiveIns(*Succ);
|
||||
if (MBB.isReturnBlock()) {
|
||||
// Return blocks are a special case because we currently don't mark up
|
||||
// return instructions completely: specifically, there is no explicit
|
||||
// use for callee-saved registers. So we add all callee saved registers
|
||||
// that are saved and restored (somewhere). This does not include
|
||||
// callee saved registers that are unused and hence not saved and
|
||||
// restored; they are called pristine.
|
||||
// FIXME: PEI should add explicit markings to return instructions
|
||||
// instead of implicitly handling them here.
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
if (MFI.isCalleeSavedInfoValid()) {
|
||||
@ -225,15 +229,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
|
||||
|
||||
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
if (!MBB.succ_empty()) {
|
||||
addPristines(MF);
|
||||
addLiveOutsNoPristines(MBB);
|
||||
} else if (MBB.isReturnBlock()) {
|
||||
// For the return block: Add all callee saved registers.
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
if (MFI.isCalleeSavedInfoValid())
|
||||
addCalleeSavedRegs(*this, MF);
|
||||
}
|
||||
addPristines(MF);
|
||||
addLiveOutsNoPristines(MBB);
|
||||
}
|
||||
|
||||
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
|
||||
|
@ -16409,7 +16409,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
|
||||
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
N1.getOperand(0).getOperand(1) == N2 &&
|
||||
N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
|
||||
VT.getVectorNumElements()) {
|
||||
VT.getVectorNumElements() &&
|
||||
N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
|
||||
VT.getSizeInBits()) {
|
||||
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
|
||||
}
|
||||
|
||||
|
@ -491,9 +491,8 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
|
||||
return VNI;
|
||||
}
|
||||
|
||||
void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
|
||||
assert(ParentVNI && "Mapping NULL value");
|
||||
ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
|
||||
void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
|
||||
ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI.id)];
|
||||
VNInfo *VNI = VFP.getPointer();
|
||||
|
||||
// ParentVNI was either unmapped or already complex mapped. Either way, just
|
||||
@ -777,7 +776,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
|
||||
// the source live range. The spiller also won't try to hoist this copy.
|
||||
if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
|
||||
MI->readsVirtualRegister(Edit->getReg())) {
|
||||
forceRecompute(0, ParentVNI);
|
||||
forceRecompute(0, *ParentVNI);
|
||||
defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
|
||||
return Idx;
|
||||
}
|
||||
@ -835,7 +834,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
|
||||
|
||||
// The complement interval will be extended as needed by LRCalc.extend().
|
||||
if (ParentVNI)
|
||||
forceRecompute(0, ParentVNI);
|
||||
forceRecompute(0, *ParentVNI);
|
||||
DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
|
||||
RegAssign.insert(Start, End, OpenIdx);
|
||||
DEBUG(dump());
|
||||
@ -878,7 +877,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
|
||||
unsigned RegIdx = AssignI.value();
|
||||
if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
|
||||
DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
|
||||
forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
|
||||
forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));
|
||||
} else {
|
||||
SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
|
||||
DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
|
||||
@ -982,7 +981,7 @@ void SplitEditor::computeRedundantBackCopies(
|
||||
}
|
||||
}
|
||||
if (!DominatedVNIs.empty()) {
|
||||
forceRecompute(0, ParentVNI);
|
||||
forceRecompute(0, *ParentVNI);
|
||||
for (auto VNI : DominatedVNIs) {
|
||||
BackCopies.push_back(VNI);
|
||||
}
|
||||
@ -1102,7 +1101,7 @@ void SplitEditor::hoistCopies() {
|
||||
NotToHoistSet.count(ParentVNI->id))
|
||||
continue;
|
||||
BackCopies.push_back(VNI);
|
||||
forceRecompute(0, ParentVNI);
|
||||
forceRecompute(0, *ParentVNI);
|
||||
}
|
||||
|
||||
// If it is not beneficial to hoist all the BackCopies, simply remove
|
||||
@ -1428,6 +1427,41 @@ void SplitEditor::deleteRematVictims() {
|
||||
Edit->eliminateDeadDefs(Dead, None, &AA);
|
||||
}
|
||||
|
||||
void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) {
|
||||
// Fast-path for common case.
|
||||
if (!ParentVNI.isPHIDef()) {
|
||||
for (unsigned I = 0, E = Edit->size(); I != E; ++I)
|
||||
forceRecompute(I, ParentVNI);
|
||||
return;
|
||||
}
|
||||
|
||||
// Trace value through phis.
|
||||
SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist.
|
||||
SmallVector<const VNInfo *, 4> WorkList;
|
||||
Visited.insert(&ParentVNI);
|
||||
WorkList.push_back(&ParentVNI);
|
||||
|
||||
const LiveInterval &ParentLI = Edit->getParent();
|
||||
const SlotIndexes &Indexes = *LIS.getSlotIndexes();
|
||||
do {
|
||||
const VNInfo &VNI = *WorkList.back();
|
||||
WorkList.pop_back();
|
||||
for (unsigned I = 0, E = Edit->size(); I != E; ++I)
|
||||
forceRecompute(I, VNI);
|
||||
if (!VNI.isPHIDef())
|
||||
continue;
|
||||
|
||||
MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(VNI.def);
|
||||
for (const MachineBasicBlock *Pred : MBB.predecessors()) {
|
||||
SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred);
|
||||
VNInfo *PredVNI = ParentLI.getVNInfoBefore(PredEnd);
|
||||
assert(PredVNI && "Value available in PhiVNI predecessor");
|
||||
if (Visited.insert(PredVNI).second)
|
||||
WorkList.push_back(PredVNI);
|
||||
}
|
||||
} while(!WorkList.empty());
|
||||
}
|
||||
|
||||
void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
|
||||
++NumFinished;
|
||||
|
||||
@ -1444,8 +1478,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
|
||||
// Force rematted values to be recomputed everywhere.
|
||||
// The new live ranges may be truncated.
|
||||
if (Edit->didRematerialize(ParentVNI))
|
||||
for (unsigned i = 0, e = Edit->size(); i != e; ++i)
|
||||
forceRecompute(i, ParentVNI);
|
||||
forceRecomputeVNI(*ParentVNI);
|
||||
}
|
||||
|
||||
// Hoist back-copies to the complement interval when in spill mode.
|
||||
|
@ -357,7 +357,11 @@ private:
|
||||
/// recomputed by LiveRangeCalc::extend regardless of the number of defs.
|
||||
/// This is used for values whose live range doesn't match RegAssign exactly.
|
||||
/// They could have rematerialized, or back-copies may have been moved.
|
||||
void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
|
||||
void forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI);
|
||||
|
||||
/// Calls forceRecompute() on any affected regidx and on ParentVNI
|
||||
/// predecessors in case of a phi definition.
|
||||
void forceRecomputeVNI(const VNInfo &ParentVNI);
|
||||
|
||||
/// defFromParent - Define Reg from ParentVNI at UseIdx using either
|
||||
/// rematerialization or a COPY from parent. Return the new value.
|
||||
|
@ -75,7 +75,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name=="ssse3.pabs.d.128" || // Added in 6.0
|
||||
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
|
||||
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
|
||||
Name.startswith("avx512.kunpck") || //added in 6.0
|
||||
Name.startswith("avx2.pabs.") || // Added in 6.0
|
||||
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
|
||||
Name.startswith("avx512.broadcastm") || // Added in 6.0
|
||||
@ -1063,12 +1062,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
|
||||
CI->getArgOperand(1));
|
||||
} else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
|
||||
uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
|
||||
uint64_t And = (1ULL << Shift) - 1;
|
||||
Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And);
|
||||
Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift);
|
||||
Rep = Builder.CreateOr(LowBits, HighBits);
|
||||
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
|
@ -83,27 +83,6 @@ namespace {
|
||||
typedef std::vector<AsmToken> MCAsmMacroArgument;
|
||||
typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
|
||||
|
||||
struct MCAsmMacroParameter {
|
||||
StringRef Name;
|
||||
MCAsmMacroArgument Value;
|
||||
bool Required = false;
|
||||
bool Vararg = false;
|
||||
|
||||
MCAsmMacroParameter() = default;
|
||||
};
|
||||
|
||||
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
|
||||
|
||||
struct MCAsmMacro {
|
||||
StringRef Name;
|
||||
StringRef Body;
|
||||
MCAsmMacroParameters Parameters;
|
||||
|
||||
public:
|
||||
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
|
||||
: Name(N), Body(B), Parameters(std::move(P)) {}
|
||||
};
|
||||
|
||||
/// \brief Helper class for storing information about an active macro
|
||||
/// instantiation.
|
||||
struct MacroInstantiation {
|
||||
@ -164,9 +143,6 @@ private:
|
||||
/// addDirectiveHandler.
|
||||
StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
|
||||
|
||||
/// \brief Map of currently defined macros.
|
||||
StringMap<MCAsmMacro> MacroMap;
|
||||
|
||||
/// \brief Stack of active macro instantiations.
|
||||
std::vector<MacroInstantiation*> ActiveMacros;
|
||||
|
||||
@ -308,17 +284,6 @@ private:
|
||||
/// \brief Control a flag in the parser that enables or disables macros.
|
||||
void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
|
||||
|
||||
/// \brief Lookup a previously defined macro.
|
||||
/// \param Name Macro name.
|
||||
/// \returns Pointer to macro. NULL if no such macro was defined.
|
||||
const MCAsmMacro* lookupMacro(StringRef Name);
|
||||
|
||||
/// \brief Define a new macro with the given name and information.
|
||||
void defineMacro(StringRef Name, MCAsmMacro Macro);
|
||||
|
||||
/// \brief Undefine a macro. If no such macro was defined, it's a no-op.
|
||||
void undefineMacro(StringRef Name);
|
||||
|
||||
/// \brief Are we inside a macro instantiation?
|
||||
bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
|
||||
|
||||
@ -1841,7 +1806,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
|
||||
|
||||
// If macros are enabled, check to see if this is a macro instantiation.
|
||||
if (areMacrosEnabled())
|
||||
if (const MCAsmMacro *M = lookupMacro(IDVal)) {
|
||||
if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) {
|
||||
return handleMacroEntry(M, IDLoc);
|
||||
}
|
||||
|
||||
@ -2720,17 +2685,6 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
|
||||
return TokError("too many positional arguments");
|
||||
}
|
||||
|
||||
const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
|
||||
StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
|
||||
return (I == MacroMap.end()) ? nullptr : &I->getValue();
|
||||
}
|
||||
|
||||
void AsmParser::defineMacro(StringRef Name, MCAsmMacro Macro) {
|
||||
MacroMap.insert(std::make_pair(Name, std::move(Macro)));
|
||||
}
|
||||
|
||||
void AsmParser::undefineMacro(StringRef Name) { MacroMap.erase(Name); }
|
||||
|
||||
bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
|
||||
// Arbitrarily limit macro nesting depth (default matches 'as'). We can
|
||||
// eliminate this, although we should protect against infinite loops.
|
||||
@ -4249,7 +4203,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
|
||||
eatToEndOfStatement();
|
||||
}
|
||||
|
||||
if (lookupMacro(Name)) {
|
||||
if (getContext().lookupMacro(Name)) {
|
||||
return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
|
||||
}
|
||||
|
||||
@ -4257,7 +4211,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
|
||||
const char *BodyEnd = EndToken.getLoc().getPointer();
|
||||
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
|
||||
checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
|
||||
defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters)));
|
||||
getContext().defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters)));
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -4416,10 +4370,10 @@ bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
|
||||
"unexpected token in '.purgem' directive"))
|
||||
return true;
|
||||
|
||||
if (!lookupMacro(Name))
|
||||
if (!getContext().lookupMacro(Name))
|
||||
return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
|
||||
|
||||
undefineMacro(Name);
|
||||
getContext().undefineMacro(Name);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -3957,6 +3957,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(CVT_F32_UBYTE2)
|
||||
NODE_NAME_CASE(CVT_F32_UBYTE3)
|
||||
NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
|
||||
NODE_NAME_CASE(CVT_PKNORM_I16_F32)
|
||||
NODE_NAME_CASE(CVT_PKNORM_U16_F32)
|
||||
NODE_NAME_CASE(CVT_PK_I16_I32)
|
||||
NODE_NAME_CASE(CVT_PK_U16_U32)
|
||||
NODE_NAME_CASE(FP_TO_FP16)
|
||||
NODE_NAME_CASE(FP16_ZEXT)
|
||||
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
|
||||
|
@ -417,6 +417,10 @@ enum NodeType : unsigned {
|
||||
// Convert two float 32 numbers into a single register holding two packed f16
|
||||
// with round to zero.
|
||||
CVT_PKRTZ_F16_F32,
|
||||
CVT_PKNORM_I16_F32,
|
||||
CVT_PKNORM_U16_F32,
|
||||
CVT_PK_I16_I32,
|
||||
CVT_PK_U16_U32,
|
||||
|
||||
// Same as the standard node, except the high bits of the resulting integer
|
||||
// are known 0.
|
||||
|
@ -108,3 +108,21 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
|
||||
|
||||
return MCOp;
|
||||
}
|
||||
|
||||
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
|
||||
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
|
||||
const Value *Ptr = MMO->getValue();
|
||||
// UndefValue means this is a load of a kernel input. These are uniform.
|
||||
// Sometimes LDS instructions have constant pointers.
|
||||
// If Ptr is null, then that means this mem operand contains a
|
||||
// PseudoSourceValue like GOT.
|
||||
if (!Ptr || isa<UndefValue>(Ptr) ||
|
||||
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
|
||||
return true;
|
||||
|
||||
if (const Argument *Arg = dyn_cast<Argument>(Ptr))
|
||||
return AMDGPU::isArgPassedInSGPR(Arg);
|
||||
|
||||
const Instruction *I = dyn_cast<Instruction>(Ptr);
|
||||
return I && I->getMetadata("amdgpu.uniform");
|
||||
}
|
||||
|
@ -50,6 +50,8 @@ public:
|
||||
/// Return -1 if the target-specific opcode for the pseudo instruction does
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
static bool isUniformMMO(const MachineMemOperand *MMO);
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
|
@ -35,6 +35,10 @@ def AMDGPUFPPackOp : SDTypeProfile<1, 2,
|
||||
[SDTCisFP<1>, SDTCisSameAs<1, 2>]
|
||||
>;
|
||||
|
||||
def AMDGPUIntPackOp : SDTypeProfile<1, 2,
|
||||
[SDTCisInt<1>, SDTCisSameAs<1, 2>]
|
||||
>;
|
||||
|
||||
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
|
||||
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
|
||||
>;
|
||||
@ -142,6 +146,10 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
|
||||
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
|
||||
|
||||
def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
|
||||
def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
|
||||
def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
|
||||
def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
|
||||
def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
|
||||
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
|
||||
def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
|
||||
|
||||
|
@ -120,7 +120,7 @@ static bool isInstrUniform(const MachineInstr &MI) {
|
||||
return false;
|
||||
|
||||
const MachineMemOperand *MMO = *MI.memoperands_begin();
|
||||
return AMDGPU::isUniformMMO(MMO);
|
||||
return AMDGPUInstrInfo::isUniformMMO(MMO);
|
||||
}
|
||||
|
||||
const RegisterBankInfo::InstructionMapping &
|
||||
|
@ -205,6 +205,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
@ -1085,7 +1086,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
|
||||
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
|
||||
const MemSDNode *MemNode = cast<MemSDNode>(N);
|
||||
|
||||
return AMDGPU::isUniformMMO(MemNode->getMemOperand());
|
||||
return AMDGPUInstrInfo::isUniformMMO(MemNode->getMemOperand());
|
||||
}
|
||||
|
||||
TargetLoweringBase::LegalizeTypeAction
|
||||
@ -3517,7 +3518,8 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
||||
if (IID == Intrinsic::amdgcn_cvt_pkrtz) {
|
||||
switch (IID) {
|
||||
case Intrinsic::amdgcn_cvt_pkrtz: {
|
||||
SDValue Src0 = N->getOperand(1);
|
||||
SDValue Src1 = N->getOperand(2);
|
||||
SDLoc SL(N);
|
||||
@ -3526,6 +3528,29 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_cvt_pknorm_i16:
|
||||
case Intrinsic::amdgcn_cvt_pknorm_u16:
|
||||
case Intrinsic::amdgcn_cvt_pk_i16:
|
||||
case Intrinsic::amdgcn_cvt_pk_u16: {
|
||||
SDValue Src0 = N->getOperand(1);
|
||||
SDValue Src1 = N->getOperand(2);
|
||||
SDLoc SL(N);
|
||||
unsigned Opcode;
|
||||
|
||||
if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
|
||||
Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
|
||||
else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
|
||||
Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
|
||||
else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
|
||||
Opcode = AMDGPUISD::CVT_PK_I16_I32;
|
||||
else
|
||||
Opcode = AMDGPUISD::CVT_PK_U16_U32;
|
||||
|
||||
SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
|
||||
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT: {
|
||||
@ -4424,10 +4449,27 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_ubfe:
|
||||
return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||
case Intrinsic::amdgcn_cvt_pkrtz: {
|
||||
// FIXME: Stop adding cast if v2f16 legal.
|
||||
case Intrinsic::amdgcn_cvt_pkrtz:
|
||||
case Intrinsic::amdgcn_cvt_pknorm_i16:
|
||||
case Intrinsic::amdgcn_cvt_pknorm_u16:
|
||||
case Intrinsic::amdgcn_cvt_pk_i16:
|
||||
case Intrinsic::amdgcn_cvt_pk_u16: {
|
||||
// FIXME: Stop adding cast if v2f16/v2i16 are legal.
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32,
|
||||
unsigned Opcode;
|
||||
|
||||
if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
|
||||
Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32;
|
||||
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
|
||||
Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
|
||||
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
|
||||
Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
|
||||
else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
|
||||
Opcode = AMDGPUISD::CVT_PK_I16_I32;
|
||||
else
|
||||
Opcode = AMDGPUISD::CVT_PK_U16_U32;
|
||||
|
||||
SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
|
||||
}
|
||||
|
@ -871,24 +871,6 @@ bool isArgPassedInSGPR(const Argument *A) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
|
||||
bool isUniformMMO(const MachineMemOperand *MMO) {
|
||||
const Value *Ptr = MMO->getValue();
|
||||
// UndefValue means this is a load of a kernel input. These are uniform.
|
||||
// Sometimes LDS instructions have constant pointers.
|
||||
// If Ptr is null, then that means this mem operand contains a
|
||||
// PseudoSourceValue like GOT.
|
||||
if (!Ptr || isa<UndefValue>(Ptr) ||
|
||||
isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
|
||||
return true;
|
||||
|
||||
if (const Argument *Arg = dyn_cast<Argument>(Ptr))
|
||||
return isArgPassedInSGPR(Arg);
|
||||
|
||||
const Instruction *I = dyn_cast<Instruction>(Ptr);
|
||||
return I && I->getMetadata("amdgpu.uniform");
|
||||
}
|
||||
|
||||
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
|
||||
if (isGCN3Encoding(ST))
|
||||
return ByteOffset;
|
||||
|
@ -363,7 +363,6 @@ LLVM_READNONE
|
||||
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
|
||||
|
||||
bool isArgPassedInSGPR(const Argument *Arg);
|
||||
bool isUniformMMO(const MachineMemOperand *MMO);
|
||||
|
||||
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
|
||||
/// offset field.
|
||||
|
@ -407,11 +407,11 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
|
||||
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
|
||||
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>;
|
||||
|
||||
} // End SubtargetPredicate = isGCN
|
||||
|
||||
|
@ -396,10 +396,14 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
|
||||
|
||||
// rip-relative addressing is actually relative to the *next* instruction.
|
||||
// Since an immediate can follow the mod/rm byte for an instruction, this
|
||||
// means that we need to bias the immediate field of the instruction with
|
||||
// the size of the immediate field. If we have this case, add it into the
|
||||
// means that we need to bias the displacement field of the instruction with
|
||||
// the size of the immediate field. If we have this case, add it into the
|
||||
// expression to emit.
|
||||
int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
|
||||
// Note: rip-relative addressing using immediate displacement values should
|
||||
// not be adjusted, assuming it was the user's intent.
|
||||
int ImmSize = !Disp.isImm() && X86II::hasImm(TSFlags)
|
||||
? X86II::getSizeOfImm(TSFlags)
|
||||
: 0;
|
||||
|
||||
EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
|
||||
CurByte, OS, Fixups, -ImmSize);
|
||||
|
@ -370,6 +370,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
|
||||
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
|
||||
char Mode, raw_ostream &O) {
|
||||
unsigned Reg = MO.getReg();
|
||||
bool EmitPercent = true;
|
||||
|
||||
switch (Mode) {
|
||||
default: return true; // Unknown mode.
|
||||
case 'b': // Print QImode register
|
||||
@ -384,6 +386,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
|
||||
case 'k': // Print SImode register
|
||||
Reg = getX86SubSuperRegister(Reg, 32);
|
||||
break;
|
||||
case 'V':
|
||||
EmitPercent = false;
|
||||
LLVM_FALLTHROUGH;
|
||||
case 'q':
|
||||
// Print 64-bit register names if 64-bit integer registers are available.
|
||||
// Otherwise, print 32-bit register names.
|
||||
@ -391,7 +396,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
|
||||
break;
|
||||
}
|
||||
|
||||
O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
|
||||
if (EmitPercent)
|
||||
O << '%';
|
||||
|
||||
O << X86ATTInstPrinter::getRegisterName(Reg);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -464,6 +472,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
case 'w': // Print HImode register
|
||||
case 'k': // Print SImode register
|
||||
case 'q': // Print DImode register
|
||||
case 'V': // Print native register without '%'
|
||||
if (MO.isReg())
|
||||
return printAsmMRegister(*this, MO, ExtraCode[0], O);
|
||||
printOperand(*this, MI, OpNo, O);
|
||||
|
@ -663,8 +663,10 @@ void X86DomainReassignment::initConverters() {
|
||||
createReplacer(X86::XOR32rr, X86::KXORDrr);
|
||||
createReplacer(X86::XOR64rr, X86::KXORQrr);
|
||||
|
||||
createReplacer(X86::TEST32rr, X86::KTESTDrr);
|
||||
createReplacer(X86::TEST64rr, X86::KTESTQrr);
|
||||
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
|
||||
// to prove only Z flag is used.
|
||||
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
|
||||
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
|
||||
}
|
||||
|
||||
if (STI->hasDQI()) {
|
||||
@ -684,8 +686,10 @@ void X86DomainReassignment::initConverters() {
|
||||
createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
|
||||
createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
|
||||
|
||||
createReplacer(X86::TEST8rr, X86::KTESTBrr);
|
||||
createReplacer(X86::TEST16rr, X86::KTESTWrr);
|
||||
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
|
||||
// to prove only Z flag is used.
|
||||
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
|
||||
//createReplacer(X86::TEST16rr, X86::KTESTWrr);
|
||||
|
||||
createReplacer(X86::XOR8rr, X86::KXORBrr);
|
||||
}
|
||||
|
@ -17017,24 +17017,6 @@ static bool hasNonFlagsUse(SDValue Op) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Emit KTEST instruction for bit vectors on AVX-512
|
||||
static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (Op.getOpcode() == ISD::BITCAST) {
|
||||
auto hasKTEST = [&](MVT VT) {
|
||||
unsigned SizeInBits = VT.getSizeInBits();
|
||||
return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
|
||||
(Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
|
||||
};
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType().getSimpleVT();
|
||||
if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
|
||||
hasKTEST(Op0VT))
|
||||
return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
@ -17079,9 +17061,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
|
||||
// we prove that the arithmetic won't overflow, we can't use OF or CF.
|
||||
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
|
||||
// Emit KTEST for bit vectors
|
||||
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
|
||||
return Node;
|
||||
// Emit a CMP with 0, which is the TEST pattern.
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
|
||||
DAG.getConstant(0, dl, Op.getValueType()));
|
||||
@ -17310,10 +17289,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
}
|
||||
|
||||
if (Opcode == 0) {
|
||||
// Emit KTEST for bit vectors
|
||||
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
|
||||
return Node;
|
||||
|
||||
// Emit a CMP with 0, which is the TEST pattern.
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
|
||||
DAG.getConstant(0, dl, Op.getValueType()));
|
||||
@ -18093,6 +18068,34 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
||||
return Result;
|
||||
}
|
||||
|
||||
// Try to select this as a KTEST+SETCC if possible.
|
||||
static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
|
||||
const SDLoc &dl, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// Only support equality comparisons.
|
||||
if (CC != ISD::SETEQ && CC != ISD::SETNE)
|
||||
return SDValue();
|
||||
|
||||
// Must be a bitcast from vXi1.
|
||||
if (Op0.getOpcode() != ISD::BITCAST)
|
||||
return SDValue();
|
||||
|
||||
Op0 = Op0.getOperand(0);
|
||||
MVT VT = Op0.getSimpleValueType();
|
||||
if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
|
||||
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
|
||||
return SDValue();
|
||||
|
||||
X86::CondCode X86CC;
|
||||
if (isNullConstant(Op1)) {
|
||||
X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
|
||||
} else
|
||||
return SDValue();
|
||||
|
||||
SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
|
||||
return getSETCC(X86CC, KTEST, dl, DAG);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
@ -18115,6 +18118,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return NewSetCC;
|
||||
}
|
||||
|
||||
// Try to lower using KTEST.
|
||||
if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
|
||||
return NewSetCC;
|
||||
|
||||
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
|
||||
// these.
|
||||
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
|
||||
@ -20525,6 +20532,18 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
Mask = DAG.getBitcast(MaskVT, Mask);
|
||||
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
|
||||
}
|
||||
case KUNPCK: {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
|
||||
|
||||
SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
|
||||
SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
|
||||
// Arguments should be swapped.
|
||||
SDValue Res = DAG.getNode(IntrData->Opc0, dl,
|
||||
MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
|
||||
Src2, Src1);
|
||||
return DAG.getBitcast(VT, Res);
|
||||
}
|
||||
case MASK_BINOP: {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
|
||||
@ -27094,28 +27113,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
|
||||
|
||||
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
||||
unsigned Reg) {
|
||||
if (Subtarget.useRetpolineExternalThunk()) {
|
||||
// When using an external thunk for retpolines, we pick names that match the
|
||||
// names GCC happens to use as well. This helps simplify the implementation
|
||||
// of the thunks for kernels where they have no easy ability to create
|
||||
// aliases and are doing non-trivial configuration of the thunk's body. For
|
||||
// example, the Linux kernel will do boot-time hot patching of the thunk
|
||||
// bodies and cannot easily export aliases of these to loaded modules.
|
||||
//
|
||||
// Note that at any point in the future, we may need to change the semantics
|
||||
// of how we implement retpolines and at that time will likely change the
|
||||
// name of the called thunk. Essentially, there is no hard guarantee that
|
||||
// LLVM will generate calls to specific thunks, we merely make a best-effort
|
||||
// attempt to help out kernels and other systems where duplicating the
|
||||
// thunks is costly.
|
||||
switch (Reg) {
|
||||
case X86::EAX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__x86_indirect_thunk_eax";
|
||||
case X86::ECX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__x86_indirect_thunk_ecx";
|
||||
case X86::EDX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__x86_indirect_thunk_edx";
|
||||
case X86::EDI:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__x86_indirect_thunk_edi";
|
||||
case X86::R11:
|
||||
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
||||
return "__x86_indirect_thunk_r11";
|
||||
}
|
||||
llvm_unreachable("unexpected reg for retpoline");
|
||||
}
|
||||
|
||||
// When targeting an internal COMDAT thunk use an LLVM-specific name.
|
||||
switch (Reg) {
|
||||
case 0:
|
||||
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
|
||||
return Subtarget.useRetpolineExternalThunk()
|
||||
? "__llvm_external_retpoline_push"
|
||||
: "__llvm_retpoline_push";
|
||||
case X86::EAX:
|
||||
return Subtarget.useRetpolineExternalThunk()
|
||||
? "__llvm_external_retpoline_eax"
|
||||
: "__llvm_retpoline_eax";
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_eax";
|
||||
case X86::ECX:
|
||||
return Subtarget.useRetpolineExternalThunk()
|
||||
? "__llvm_external_retpoline_ecx"
|
||||
: "__llvm_retpoline_ecx";
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_ecx";
|
||||
case X86::EDX:
|
||||
return Subtarget.useRetpolineExternalThunk()
|
||||
? "__llvm_external_retpoline_edx"
|
||||
: "__llvm_retpoline_edx";
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edx";
|
||||
case X86::EDI:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edi";
|
||||
case X86::R11:
|
||||
return Subtarget.useRetpolineExternalThunk()
|
||||
? "__llvm_external_retpoline_r11"
|
||||
: "__llvm_retpoline_r11";
|
||||
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
||||
return "__llvm_retpoline_r11";
|
||||
}
|
||||
llvm_unreachable("unexpected reg for retpoline");
|
||||
}
|
||||
@ -27134,15 +27182,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
||||
// just use R11, but we scan for uses anyway to ensure we don't generate
|
||||
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
|
||||
// already a register use operand to the call to hold the callee. If none
|
||||
// are available, push the callee instead. This is less efficient, but is
|
||||
// necessary for functions using 3 regparms. Such function calls are
|
||||
// (currently) not eligible for tail call optimization, because there is no
|
||||
// scratch register available to hold the address of the callee.
|
||||
// are available, use EDI instead. EDI is chosen because EBX is the PIC base
|
||||
// register and ESI is the base pointer to realigned stack frames with VLAs.
|
||||
SmallVector<unsigned, 3> AvailableRegs;
|
||||
if (Subtarget.is64Bit())
|
||||
AvailableRegs.push_back(X86::R11);
|
||||
else
|
||||
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
|
||||
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
|
||||
|
||||
// Zero out any registers that are already used.
|
||||
for (const auto &MO : MI.operands()) {
|
||||
@ -27160,30 +27206,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!AvailableReg)
|
||||
report_fatal_error("calling convention incompatible with retpoline, no "
|
||||
"available registers");
|
||||
|
||||
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
|
||||
|
||||
if (AvailableReg == 0) {
|
||||
// No register available. Use PUSH. This must not be a tailcall, and this
|
||||
// must not be x64.
|
||||
if (Subtarget.is64Bit())
|
||||
report_fatal_error(
|
||||
"Cannot make an indirect call on x86-64 using both retpoline and a "
|
||||
"calling convention that preservers r11");
|
||||
if (Opc != X86::CALLpcrel32)
|
||||
report_fatal_error("Cannot make an indirect tail call on x86 using "
|
||||
"retpoline without a preserved register");
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
|
||||
MI.getOperand(0).ChangeToES(Symbol);
|
||||
MI.setDesc(TII->get(Opc));
|
||||
} else {
|
||||
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
|
||||
.addReg(CalleeVReg);
|
||||
MI.getOperand(0).ChangeToES(Symbol);
|
||||
MI.setDesc(TII->get(Opc));
|
||||
MachineInstrBuilder(*BB->getParent(), &MI)
|
||||
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
|
||||
}
|
||||
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
|
||||
.addReg(CalleeVReg);
|
||||
MI.getOperand(0).ChangeToES(Symbol);
|
||||
MI.setDesc(TII->get(Opc));
|
||||
MachineInstrBuilder(*BB->getParent(), &MI)
|
||||
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
|
||||
return BB;
|
||||
}
|
||||
|
||||
@ -30432,53 +30466,6 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
|
||||
SDValue N0 = BitCast.getOperand(0);
|
||||
EVT VecVT = N0->getValueType(0);
|
||||
|
||||
if (VT.isVector() && VecVT.isScalarInteger() && Subtarget.hasAVX512() &&
|
||||
N0->getOpcode() == ISD::OR) {
|
||||
SDValue Op0 = N0->getOperand(0);
|
||||
SDValue Op1 = N0->getOperand(1);
|
||||
MVT TrunckVT;
|
||||
MVT BitcastVT;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
return SDValue();
|
||||
case MVT::v16i1:
|
||||
TrunckVT = MVT::i8;
|
||||
BitcastVT = MVT::v8i1;
|
||||
break;
|
||||
case MVT::v32i1:
|
||||
TrunckVT = MVT::i16;
|
||||
BitcastVT = MVT::v16i1;
|
||||
break;
|
||||
case MVT::v64i1:
|
||||
TrunckVT = MVT::i32;
|
||||
BitcastVT = MVT::v32i1;
|
||||
break;
|
||||
}
|
||||
bool isArg0UndefRight = Op0->getOpcode() == ISD::SHL;
|
||||
bool isArg0UndefLeft =
|
||||
Op0->getOpcode() == ISD::ZERO_EXTEND || Op0->getOpcode() == ISD::AND;
|
||||
bool isArg1UndefRight = Op1->getOpcode() == ISD::SHL;
|
||||
bool isArg1UndefLeft =
|
||||
Op1->getOpcode() == ISD::ZERO_EXTEND || Op1->getOpcode() == ISD::AND;
|
||||
SDValue OpLeft;
|
||||
SDValue OpRight;
|
||||
if (isArg0UndefRight && isArg1UndefLeft) {
|
||||
OpLeft = Op0;
|
||||
OpRight = Op1;
|
||||
} else if (isArg1UndefRight && isArg0UndefLeft) {
|
||||
OpLeft = Op1;
|
||||
OpRight = Op0;
|
||||
} else
|
||||
return SDValue();
|
||||
SDLoc DL(BitCast);
|
||||
SDValue Shr = OpLeft->getOperand(0);
|
||||
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, Shr);
|
||||
SDValue Bitcast1 = DAG.getBitcast(BitcastVT, Trunc1);
|
||||
SDValue Trunc2 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, OpRight);
|
||||
SDValue Bitcast2 = DAG.getBitcast(BitcastVT, Trunc2);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Bitcast1, Bitcast2);
|
||||
}
|
||||
|
||||
if (!VT.isScalarInteger() || !VecVT.isSimple())
|
||||
return SDValue();
|
||||
|
||||
|
@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP,
|
||||
ROUNDP, ROUNDS
|
||||
};
|
||||
@ -479,6 +479,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
|
||||
X86ISD::FADD_RND),
|
||||
|
@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
|
||||
static const char EAXThunkName[] = "__llvm_retpoline_eax";
|
||||
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
|
||||
static const char EDXThunkName[] = "__llvm_retpoline_edx";
|
||||
static const char PushThunkName[] = "__llvm_retpoline_push";
|
||||
static const char EDIThunkName[] = "__llvm_retpoline_edi";
|
||||
|
||||
namespace {
|
||||
class X86RetpolineThunks : public MachineFunctionPass {
|
||||
@ -74,7 +74,6 @@ private:
|
||||
|
||||
void createThunkFunction(Module &M, StringRef Name);
|
||||
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
|
||||
void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
|
||||
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
|
||||
};
|
||||
|
||||
@ -127,7 +126,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
||||
createThunkFunction(M, R11ThunkName);
|
||||
else
|
||||
for (StringRef Name :
|
||||
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
|
||||
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
|
||||
createThunkFunction(M, Name);
|
||||
InsertedThunks = true;
|
||||
return true;
|
||||
@ -151,9 +150,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
||||
populateThunk(MF, X86::R11);
|
||||
} else {
|
||||
// For 32-bit targets we need to emit a collection of thunks for various
|
||||
// possible scratch registers as well as a fallback that is used when
|
||||
// there are no scratch registers and assumes the retpoline target has
|
||||
// been pushed.
|
||||
// possible scratch registers as well as a fallback that uses EDI, which is
|
||||
// normally callee saved.
|
||||
// __llvm_retpoline_eax:
|
||||
// calll .Leax_call_target
|
||||
// .Leax_capture_spec:
|
||||
@ -174,32 +172,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
||||
// movl %edx, (%esp)
|
||||
// retl
|
||||
//
|
||||
// This last one is a bit more special and so needs a little extra
|
||||
// handling.
|
||||
// __llvm_retpoline_push:
|
||||
// calll .Lpush_call_target
|
||||
// .Lpush_capture_spec:
|
||||
// pause
|
||||
// lfence
|
||||
// jmp .Lpush_capture_spec
|
||||
// .align 16
|
||||
// .Lpush_call_target:
|
||||
// # Clear pause_loop return address.
|
||||
// addl $4, %esp
|
||||
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
|
||||
// pushl 4(%esp) # Push callee
|
||||
// pushl 4(%esp) # Push RA
|
||||
// popl 8(%esp) # Pop RA to final RA
|
||||
// popl (%esp) # Pop callee to next top of stack
|
||||
// retl # Ret to callee
|
||||
// __llvm_retpoline_edi:
|
||||
// ... # Same setup
|
||||
// movl %edi, (%esp)
|
||||
// retl
|
||||
if (MF.getName() == EAXThunkName)
|
||||
populateThunk(MF, X86::EAX);
|
||||
else if (MF.getName() == ECXThunkName)
|
||||
populateThunk(MF, X86::ECX);
|
||||
else if (MF.getName() == EDXThunkName)
|
||||
populateThunk(MF, X86::EDX);
|
||||
else if (MF.getName() == PushThunkName)
|
||||
populateThunk(MF);
|
||||
else if (MF.getName() == EDIThunkName)
|
||||
populateThunk(MF, X86::EDI);
|
||||
else
|
||||
llvm_unreachable("Invalid thunk name on x86-32!");
|
||||
}
|
||||
@ -240,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
|
||||
.addReg(Reg);
|
||||
}
|
||||
|
||||
void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
|
||||
MachineBasicBlock &MBB) {
|
||||
// The instruction sequence we use to replace the return address without
|
||||
// a scratch register is somewhat complicated:
|
||||
// # Clear capture_spec from return address.
|
||||
// addl $4, %esp
|
||||
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
|
||||
// pushl 4(%esp) # Push callee
|
||||
// pushl 4(%esp) # Push RA
|
||||
// popl 8(%esp) # Pop RA to final RA
|
||||
// popl (%esp) # Pop callee to next top of stack
|
||||
// retl # Ret to callee
|
||||
BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
|
||||
.addReg(X86::ESP)
|
||||
.addImm(4);
|
||||
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
|
||||
false, 4);
|
||||
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
|
||||
false, 4);
|
||||
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
|
||||
false, 8);
|
||||
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
|
||||
false, 0);
|
||||
}
|
||||
|
||||
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
|
||||
Optional<unsigned> Reg) {
|
||||
// Set MF properties. We never use vregs...
|
||||
@ -301,11 +260,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
|
||||
CaptureSpec->addSuccessor(CaptureSpec);
|
||||
|
||||
CallTarget->setAlignment(4);
|
||||
if (Reg) {
|
||||
insertRegReturnAddrClobber(*CallTarget, *Reg);
|
||||
} else {
|
||||
assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
|
||||
insert32BitPushReturnAddrClobber(*CallTarget);
|
||||
}
|
||||
insertRegReturnAddrClobber(*CallTarget, *Reg);
|
||||
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
|
||||
}
|
||||
|
@ -3264,6 +3264,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_cvt_pknorm_i16:
|
||||
case Intrinsic::amdgcn_cvt_pknorm_u16:
|
||||
case Intrinsic::amdgcn_cvt_pk_i16:
|
||||
case Intrinsic::amdgcn_cvt_pk_u16: {
|
||||
Value *Src0 = II->getArgOperand(0);
|
||||
Value *Src1 = II->getArgOperand(1);
|
||||
|
||||
if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
|
||||
return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_ubfe:
|
||||
case Intrinsic::amdgcn_sbfe: {
|
||||
// Decompose simple cases into standard shifts.
|
||||
|
84
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
Normal file
84
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
Normal file
@ -0,0 +1,84 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pk_i16_i32:
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define amdgpu_kernel void @s_cvt_pk_i16_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %y)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pk_i16_samereg_i32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
|
||||
define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(i32 addrspace(1)* %out, i32 %x) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %x)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_i16_i32:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pk_i16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%b = load volatile i32, i32 addrspace(1)* %b.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %a, i32 %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_i16_i32_reg_imm:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1
|
||||
define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %a, i32 1)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_i16_i32_imm_reg:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, 1, [[A]]
|
||||
; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, 1, [[A]]
|
||||
define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 1, i32 %a)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
84
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
Normal file
84
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
Normal file
@ -0,0 +1,84 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pk_u16_u32:
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define amdgpu_kernel void @s_cvt_pk_u16_u32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %y)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pk_u16_samereg_i32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
|
||||
define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(i32 addrspace(1)* %out, i32 %x) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %x)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u16_u32:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pk_u16_u32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%b = load volatile i32, i32 addrspace(1)* %b.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %a, i32 %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u16_u32_reg_imm:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1
|
||||
define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %a, i32 1)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pk_u16_u32_imm_reg:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, 1, [[A]]
|
||||
; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, 1, [[A]]
|
||||
define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile i32, i32 addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 1, i32 %a)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
164
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
Normal file
164
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
Normal file
@ -0,0 +1,164 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pknorm_i16_f32:
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define amdgpu_kernel void @s_cvt_pknorm_i16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %y)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pknorm_i16_samereg_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
|
||||
define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %x)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_reg_imm:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float 1.0)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_imm_reg:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float 1.0, float %a)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_fneg_lo:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.a = fsub float -0.0, %a
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.a, float %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_fneg_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_fneg_lo_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.a = fsub float -0.0, %a
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fabs.a = call float @llvm.fabs.f32(float %a)
|
||||
%neg.fabs.a = fsub float -0.0, %fabs.a
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.fabs.a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
164
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
Normal file
164
test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
Normal file
@ -0,0 +1,164 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pknorm_u16_f32:
|
||||
; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}}
|
||||
; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}}
|
||||
; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]]
|
||||
; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[X]], [[VY]]
|
||||
define amdgpu_kernel void @s_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %y)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_cvt_pknorm_u16_samereg_f32:
|
||||
; GCN: s_load_dword [[X:s[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]]
|
||||
define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 {
|
||||
%result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %x)
|
||||
%r = bitcast <2 x i16> %result to i32
|
||||
store i32 %r, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_reg_imm:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float 1.0)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_imm_reg:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, 1.0, [[A]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float 1.0, float %a)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.a = fsub float -0.0, %a
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_lo_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%neg.a = fsub float -0.0, %a
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi:
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]]
|
||||
define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%b = load volatile float, float addrspace(1)* %b.gep
|
||||
%fabs.a = call float @llvm.fabs.f32(float %a)
|
||||
%neg.fabs.a = fsub float -0.0, %fabs.a
|
||||
%neg.b = fsub float -0.0, %b
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.fabs.a, float %neg.b)
|
||||
%r = bitcast <2 x i16> %cvt to i32
|
||||
store i32 %r, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s
|
||||
; RUN: llc -verify-machineinstrs < %s
|
||||
; PR25838
|
||||
|
||||
target triple = "armv7--linux-android"
|
||||
|
245
test/CodeGen/ARM/splitkit.ll
Normal file
245
test/CodeGen/ARM/splitkit.ll
Normal file
@ -0,0 +1,245 @@
|
||||
; RUN: llc -o - %s | FileCheck %s
|
||||
; Make sure RegAllocGreedy/SplitKit do not produce invalid liveness information
|
||||
; and crash when splitting a liverange twice and rematerializing each time.
|
||||
; (Sorry for the testcase; this was ran through bugpoint and then manually
|
||||
; reduced for several hours but is still big...)
|
||||
target triple = "thumbv7-apple-ios"
|
||||
|
||||
%struct.ham = type { %struct.wombat.0 }
|
||||
%struct.wombat.0 = type { %struct.barney }
|
||||
%struct.barney = type { %struct.snork.1 }
|
||||
%struct.snork.1 = type { %struct.wobble.2 }
|
||||
%struct.wobble.2 = type { %struct.blam }
|
||||
%struct.blam = type { i32, i32, i8* }
|
||||
%struct.ham.3 = type { %struct.pluto }
|
||||
%struct.pluto = type { %struct.zot*, %struct.snork.5, %struct.wibble }
|
||||
%struct.zot = type { %struct.blam.4* }
|
||||
%struct.blam.4 = type <{ %struct.zot, %struct.blam.4*, %struct.zot*, i8, [3 x i8] }>
|
||||
%struct.snork.5 = type { %struct.quux }
|
||||
%struct.quux = type { %struct.zot }
|
||||
%struct.wibble = type { %struct.widget }
|
||||
%struct.widget = type { i32 }
|
||||
%struct.bar = type { %struct.spam }
|
||||
%struct.spam = type { %struct.zot*, %struct.wobble, %struct.zot.7 }
|
||||
%struct.wobble = type { %struct.wibble.6 }
|
||||
%struct.wibble.6 = type { %struct.zot }
|
||||
%struct.zot.7 = type { %struct.ham.8 }
|
||||
%struct.ham.8 = type { i32 }
|
||||
%struct.hoge = type { %struct.ham, %struct.foo }
|
||||
%struct.foo = type { float, float }
|
||||
%struct.wombat = type { %struct.ham, float }
|
||||
%struct.snork = type { %struct.ham.9, [11 x i8] }
|
||||
%struct.ham.9 = type { i8 }
|
||||
|
||||
@global = external global i8
|
||||
@global.1 = private constant [20 x i8] c"aaaaaaaaaaaaaaaaaa0\00"
|
||||
@global.2 = external constant [27 x i8]
|
||||
@global.3 = external global %struct.ham
|
||||
@global.4 = external constant [47 x i8]
|
||||
@global.5 = external constant [61 x i8]
|
||||
@global.6 = external constant [40 x i8]
|
||||
@global.7 = external constant [24 x i8]
|
||||
@global.8 = external constant [20 x i8]
|
||||
@global.9 = external global %struct.ham
|
||||
@global.10 = external global %struct.ham
|
||||
@global.11 = external global %struct.ham
|
||||
@global.12 = external global %struct.ham
|
||||
@global.13 = external global %struct.ham
|
||||
@global.14 = external global %struct.ham
|
||||
@global.15 = external global %struct.ham
|
||||
@global.16 = external global %struct.ham
|
||||
@global.17 = external global %struct.ham
|
||||
@global.18 = external constant [35 x i8]
|
||||
@global.19 = external global %struct.ham
|
||||
@global.20 = external constant [53 x i8]
|
||||
@global.21 = external global %struct.ham
|
||||
@global.22 = external global %struct.ham
|
||||
@global.23 = external global %struct.ham
|
||||
@global.24 = external constant [32 x i8]
|
||||
@global.25 = external global %struct.ham
|
||||
@global.26 = external constant [47 x i8]
|
||||
@global.27 = external global %struct.ham
|
||||
@global.28 = external constant [45 x i8]
|
||||
@global.29 = external global %struct.ham
|
||||
@global.30 = external global %struct.ham
|
||||
@global.31 = external constant [24 x i8]
|
||||
@global.32 = external global %struct.ham
|
||||
@global.33 = external global %struct.ham
|
||||
@global.34 = external global %struct.ham
|
||||
@global.35 = external global %struct.ham
|
||||
@global.36 = external constant [27 x i8]
|
||||
@global.37 = external global %struct.ham
|
||||
@global.38 = external constant [10 x i8]
|
||||
@global.39 = external global %struct.ham
|
||||
@global.40 = external global %struct.ham
|
||||
@global.41 = external global %struct.ham
|
||||
@global.42 = external global %struct.ham
|
||||
@global.43 = external global %struct.ham
|
||||
@global.44 = external constant [41 x i8]
|
||||
@global.45 = external global %struct.ham
|
||||
@global.46 = external global %struct.ham
|
||||
@global.47 = external global %struct.ham
|
||||
@global.48 = external global %struct.ham
|
||||
@global.49 = external constant [52 x i8]
|
||||
@global.50 = external constant [47 x i8]
|
||||
@global.51 = external global %struct.ham
|
||||
@global.52 = external global %struct.ham
|
||||
@global.53 = external global %struct.ham
|
||||
@global.54 = external global %struct.ham
|
||||
@global.55 = external global %struct.ham.3
|
||||
@global.56 = external global %struct.bar
|
||||
@global.57 = external global i8
|
||||
|
||||
declare %struct.ham* @bar(%struct.ham* returned)
|
||||
|
||||
declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
|
||||
|
||||
declare %struct.ham* @wobble(%struct.ham* returned, %struct.ham* )
|
||||
|
||||
declare i32 @quux(...)
|
||||
|
||||
declare i8* @_Znwm(i32)
|
||||
|
||||
declare i32 @wobble.58(%struct.pluto*, [1 x i32], %struct.ham* , %struct.hoge* )
|
||||
|
||||
declare i32 @widget(%struct.spam*, [1 x i32], %struct.ham* , %struct.wombat* )
|
||||
|
||||
; Just check we didn't crash and did output something...
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: trap
|
||||
define internal void @func() section "__TEXT,__StaticInit,regular,pure_instructions" personality i32 (...)* @quux {
|
||||
%tmp = tail call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.3 to i8*), i8* @global) #0
|
||||
%tmp2 = invoke %struct.ham* @wobble(%struct.ham* undef, %struct.ham* @global.9)
|
||||
to label %bb14 unwind label %bbunwind
|
||||
|
||||
bb14:
|
||||
%tmp15 = getelementptr i8, i8* undef, i32 12
|
||||
store i8 0, i8* %tmp15
|
||||
%tmp16 = icmp eq i8 undef, 0
|
||||
br i1 %tmp16, label %bb28, label %bb18
|
||||
|
||||
bb18:
|
||||
br i1 undef, label %bb21, label %bb29
|
||||
|
||||
bb21:
|
||||
%tmp22 = call i8* @_Znwm(i32 16)
|
||||
store i32 17, i32* getelementptr (%struct.ham, %struct.ham* @global.10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp23 = call i8* @_Znwm(i32 32)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([27 x i8], [27 x i8]* @global.2, i32 0, i32 0), i32 26, i32 1, i1 false)
|
||||
store i32 33, i32* getelementptr (%struct.ham, %struct.ham* @global.11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
store i32 23, i32* getelementptr (%struct.ham, %struct.ham* @global.11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([24 x i8], [24 x i8]* @global.7, i32 0, i32 0), i32 23, i32 1, i1 false)
|
||||
%tmp24 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.11 to i8*), i8* @global) #0
|
||||
store i32 49, i32* getelementptr (%struct.ham, %struct.ham* @global.12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
store i32 37, i32* getelementptr (%struct.ham, %struct.ham* @global.13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.14 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
%tmp25 = call i8* @_Znwm(i32 48)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp25, i8* align 1 getelementptr ([40 x i8], [40 x i8]* @global.6, i32 0, i32 0), i32 39, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([47 x i8], [47 x i8]* @global.4, i32 0, i32 0), i32 46, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([61 x i8], [61 x i8]* @global.5, i32 0, i32 0), i32 60, i32 1, i1 false)
|
||||
%tmp26 = call i8* @_Znwm(i32 48)
|
||||
store i32 65, i32* getelementptr (%struct.ham, %struct.ham* @global.15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp27 = icmp eq i8 undef, 0
|
||||
br i1 %tmp27, label %bb30, label %bb33
|
||||
|
||||
bb28:
|
||||
call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
bb29:
|
||||
call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
bb30:
|
||||
%tmp31 = icmp eq i32 undef, 37
|
||||
br i1 %tmp31, label %bb32, label %bb30
|
||||
|
||||
bb32:
|
||||
store i8 1, i8* @global.57
|
||||
br label %bb33
|
||||
|
||||
bb33:
|
||||
%tmp34 = call i8* @_Znwm(i32 32)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([20 x i8], [20 x i8]* @global.1, i32 0, i32 0), i32 19, i32 1, i1 false)
|
||||
store i32 17, i32* getelementptr (%struct.ham, %struct.ham* @global.16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
store i32 65, i32* getelementptr (%struct.ham, %struct.ham* @global.17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([35 x i8], [35 x i8]* @global.18, i32 0, i32 0), i32 34, i32 1, i1 false)
|
||||
store i32 65, i32* getelementptr (%struct.ham, %struct.ham* @global.19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([53 x i8], [53 x i8]* @global.20, i32 0, i32 0), i32 52, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([20 x i8], [20 x i8]* @global.8, i32 0, i32 0), i32 19, i32 1, i1 false)
|
||||
store i32 37, i32* getelementptr (%struct.ham, %struct.ham* @global.21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
%tmp35 = call i8* @_Znwm(i32 32)
|
||||
store i8 16, i8* bitcast (%struct.ham* @global.22 to i8*)
|
||||
%tmp36 = call i8* @_Znwm(i32 32)
|
||||
store i32 31, i32* getelementptr (%struct.ham, %struct.ham* @global.23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp36, i8* align 1 getelementptr ([32 x i8], [32 x i8]* @global.24, i32 0, i32 0), i32 31, i32 1, i1 false)
|
||||
%tmp37 = getelementptr i8, i8* %tmp36, i32 31
|
||||
store i8 0, i8* %tmp37
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([47 x i8], [47 x i8]* @global.26, i32 0, i32 0), i32 46, i32 1, i1 false)
|
||||
%tmp38 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.25 to i8*), i8* @global) #0
|
||||
%tmp39 = call i8* @_Znwm(i32 48)
|
||||
store i32 44, i32* getelementptr (%struct.ham, %struct.ham* @global.27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp39, i8* align 1 getelementptr ([45 x i8], [45 x i8]* @global.28, i32 0, i32 0), i32 44, i32 1, i1 false)
|
||||
%tmp40 = getelementptr i8, i8* %tmp39, i32 44
|
||||
store i8 0, i8* %tmp40
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.29 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
%tmp41 = call i8* @_Znwm(i32 32)
|
||||
store i32 23, i32* getelementptr (%struct.ham, %struct.ham* @global.30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp41, i8* align 1 getelementptr ([24 x i8], [24 x i8]* @global.31, i32 0, i32 0), i32 23, i32 1, i1 false)
|
||||
%tmp42 = getelementptr i8, i8* %tmp41, i32 23
|
||||
store i8 0, i8* %tmp42
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.32 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
store i8 16, i8* bitcast (%struct.ham* @global.32 to i8*)
|
||||
%tmp43 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.33 to i8*), i8* @global) #0
|
||||
%tmp44 = call i8* @_Znwm(i32 16)
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.34 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.9 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
%tmp45 = call i8* @_Znwm(i32 32)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp45, i8* align 1 getelementptr ([27 x i8], [27 x i8]* @global.36, i32 0, i32 0), i32 26, i32 1, i1 false)
|
||||
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast (%struct.ham* @global.37 to i8*), i8 0, i32 12, i32 1, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr (%struct.snork, %struct.snork* bitcast (%struct.ham* @global.37 to %struct.snork*), i32 0, i32 1, i32 0), i8* align 1 getelementptr ([10 x i8], [10 x i8]* @global.38, i32 0, i32 0), i32 9, i32 1, i1 false)
|
||||
store i32 17, i32* getelementptr (%struct.ham, %struct.ham* @global.39, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tmp46 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.40 to i8*), i8* @global) #0
|
||||
%tmp47 = call i8* @_Znwm(i32 32)
|
||||
%tmp48 = getelementptr i8, i8* %tmp47, i32 21
|
||||
store i8 0, i8* %tmp48
|
||||
store i32 33, i32* getelementptr (%struct.ham, %struct.ham* @global.41, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
store i32 15, i32* getelementptr (%struct.ham, %struct.ham* @global.42, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
%tmp49 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.43 to i8*), i8* @global) #0
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([41 x i8], [41 x i8]* @global.44, i32 0, i32 0), i32 40, i32 1, i1 false)
|
||||
%tmp50 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.45 to i8*), i8* @global) #0
|
||||
%tmp51 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.46 to i8*), i8* @global) #0
|
||||
%tmp52 = call i8* @_Znwm(i32 32)
|
||||
store i8* %tmp52, i8** getelementptr (%struct.ham, %struct.ham* @global.47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([52 x i8], [52 x i8]* @global.49, i32 0, i32 0), i32 51, i32 1, i1 false)
|
||||
%tmp53 = call i32 @__cxa_atexit(void (i8*)* bitcast (%struct.ham* (%struct.ham*)* @bar to void (i8*)*), i8* bitcast (%struct.ham* @global.48 to i8*), i8* @global) #0
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr ([47 x i8], [47 x i8]* @global.50, i32 0, i32 0), i32 46, i32 1, i1 false)
|
||||
store i32 33, i32* getelementptr (%struct.ham, %struct.ham* @global.51, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
store i32 37, i32* getelementptr (%struct.ham, %struct.ham* @global.52, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1)
|
||||
%tmp54 = invoke %struct.ham* @wobble(%struct.ham* undef, %struct.ham* @global.54)
|
||||
to label %bb58 unwind label %bbunwind
|
||||
|
||||
bb58:
|
||||
%tmp59 = invoke i32 @wobble.58(%struct.pluto* getelementptr (%struct.ham.3, %struct.ham.3* @global.55, i32 0, i32 0), [1 x i32] [i32 ptrtoint (%struct.zot* getelementptr (%struct.ham.3, %struct.ham.3* @global.55, i32 0, i32 0, i32 1, i32 0, i32 0) to i32)], %struct.ham* undef, %struct.hoge* undef)
|
||||
to label %bb71 unwind label %bbunwind
|
||||
|
||||
bb71:
|
||||
%tmp72 = invoke i32 @widget(%struct.spam* getelementptr (%struct.bar, %struct.bar* @global.56, i32 0, i32 0), [1 x i32] [i32 ptrtoint (%struct.zot* getelementptr (%struct.bar, %struct.bar* @global.56, i32 0, i32 0, i32 1, i32 0, i32 0) to i32)], %struct.ham* undef, %struct.wombat* undef)
|
||||
to label %bb73 unwind label %bbunwind
|
||||
|
||||
bb73:
|
||||
ret void
|
||||
|
||||
bbunwind:
|
||||
%tmp75 = landingpad { i8*, i32 }
|
||||
cleanup
|
||||
resume { i8*, i32 } undef
|
||||
}
|
||||
|
||||
declare void @llvm.trap()
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* , i32, i32, i1)
|
||||
|
||||
declare void @llvm.memset.p0i8.i32(i8* , i8, i32, i32, i1)
|
||||
|
||||
attributes #0 = { nounwind }
|
46
test/CodeGen/Thumb/stm-scavenging.ll
Normal file
46
test/CodeGen/Thumb/stm-scavenging.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
target triple = "thumbv6---gnueabi"
|
||||
|
||||
; Use STM to save the three registers
|
||||
; CHECK-LABEL: use_stm:
|
||||
; CHECK: .save {r7, lr}
|
||||
; CHECK: .setfp r7, sp
|
||||
; CHECK: stm r3!, {r0, r1, r2}
|
||||
; CHECK: bl throws_1
|
||||
define void @use_stm(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr noreturn "no-frame-pointer-elim"="true" {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %d, i32 2
|
||||
store i32 %a, i32* %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %d, i32 3
|
||||
store i32 %b, i32* %arrayidx1, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %d, i32 4
|
||||
store i32 %c, i32* %arrayidx2, align 4
|
||||
tail call void @throws_1(i32 %a, i32 %b, i32 %c) noreturn
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Don't use STM: there is no available register to store
|
||||
; the address. We could transform this with some extra math, but
|
||||
; that currently isn't implemented.
|
||||
; CHECK-LABEL: no_stm:
|
||||
; CHECK: .save {r7, lr}
|
||||
; CHECK: .setfp r7, sp
|
||||
; CHECK: str r0,
|
||||
; CHECK: str r1,
|
||||
; CHECK: str r2,
|
||||
; CHECK: bl throws_2
|
||||
define void @no_stm(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr noreturn "no-frame-pointer-elim"="true" {
|
||||
entry:
|
||||
%arrayidx = getelementptr inbounds i32, i32* %d, i32 2
|
||||
store i32 %a, i32* %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %d, i32 3
|
||||
store i32 %b, i32* %arrayidx1, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %d, i32 4
|
||||
store i32 %c, i32* %arrayidx2, align 4
|
||||
tail call void @throws_2(i32 %a, i32 %b, i32 %c, i32* %d) noreturn
|
||||
unreachable
|
||||
}
|
||||
|
||||
|
||||
declare void @throws_1(i32, i32, i32) noreturn
|
||||
declare void @throws_2(i32, i32, i32, i32*) noreturn
|
@ -5,59 +5,6 @@
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
|
||||
|
||||
|
||||
define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) local_unnamed_addr #0 {
|
||||
; X32-LABEL: test_mm512_kunpackb:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
|
||||
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
|
||||
; X32-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; X32-NEXT: vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
|
||||
; X32-NEXT: kmovw %k0, %eax
|
||||
; X32-NEXT: movzwl %ax, %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_kunpackb:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
|
||||
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
|
||||
; X64-NEXT: kunpckbw %k0, %k1, %k1
|
||||
; X64-NEXT: vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
|
||||
; X64-NEXT: kmovw %k0, %eax
|
||||
; X64-NEXT: movzwl %ax, %eax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__A to <16 x i32>
|
||||
%1 = bitcast <8 x i64> %__B to <16 x i32>
|
||||
%2 = icmp ne <16 x i32> %0, %1
|
||||
%3 = bitcast <16 x i1> %2 to i16
|
||||
%4 = bitcast <8 x i64> %__C to <16 x i32>
|
||||
%5 = bitcast <8 x i64> %__D to <16 x i32>
|
||||
%6 = icmp ne <16 x i32> %4, %5
|
||||
%7 = bitcast <16 x i1> %6 to i16
|
||||
%8 = and i16 %7, 255
|
||||
%shl.i = shl i16 %3, 8
|
||||
%or.i = or i16 %8, %shl.i
|
||||
%9 = bitcast <8 x i64> %__E to <16 x i32>
|
||||
%10 = bitcast <8 x i64> %__F to <16 x i32>
|
||||
%11 = icmp ne <16 x i32> %9, %10
|
||||
%12 = bitcast i16 %or.i to <16 x i1>
|
||||
%13 = and <16 x i1> %11, %12
|
||||
%14 = bitcast <16 x i1> %13 to i16
|
||||
ret i16 %14
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
|
||||
; X32-LABEL: test_mm512_shuffle_f32x4:
|
||||
; X32: # %bb.0: # %entry
|
||||
|
@ -1,20 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
|
||||
|
||||
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
|
||||
; CHECK-LABEL: unpckbw_test:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: shll $8, %esi
|
||||
; CHECK-NEXT: orl %esi, %eax
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
|
||||
; CHECK: ## %bb.0:
|
||||
|
@ -96,6 +96,21 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
|
||||
ret i16 %t2
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
|
||||
|
||||
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
|
||||
; CHECK-LABEL: unpckbw_test:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: kmovw %edi, %k0
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: kunpckbw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: ## kill: def %ax killed %ax killed %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
|
||||
; TODO: the two kxnor instructions here a no op and should be elimintaed,
|
||||
; probably by FoldConstantArithmetic in SelectionDAG.
|
||||
|
@ -2775,3 +2775,99 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
|
||||
%ret = bitcast <8 x i1> %m2 to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
; Make sure we don't emit a ktest for signed comparisons.
|
||||
define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; KNL-LABEL: ktest_signed:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: pushq %rax
|
||||
; KNL-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testw %ax, %ax
|
||||
; KNL-NEXT: jle LBB63_1
|
||||
; KNL-NEXT: ## %bb.2: ## %bb.2
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB63_1: ## %bb.1
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: ktest_signed:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: pushq %rax
|
||||
; SKX-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testw %ax, %ax
|
||||
; SKX-NEXT: jle LBB63_1
|
||||
; SKX-NEXT: ## %bb.2: ## %bb.2
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB63_1: ## %bb.1
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: ktest_signed:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: pushq %rax
|
||||
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testw %ax, %ax
|
||||
; AVX512BW-NEXT: jle LBB63_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB63_1: ## %bb.1
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: ktest_signed:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: pushq %rax
|
||||
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: testw %ax, %ax
|
||||
; AVX512DQ-NEXT: jle LBB63_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB63_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: retq
|
||||
%a = icmp eq <16 x i32> %x, zeroinitializer
|
||||
%b = icmp eq <16 x i32> %y, zeroinitializer
|
||||
%c = and <16 x i1> %a, %b
|
||||
%d = bitcast <16 x i1> %c to i16
|
||||
%e = icmp sgt i16 %d, 0
|
||||
br i1 %e, label %bb.2, label %bb.1
|
||||
bb.1:
|
||||
call void @foo()
|
||||
br label %bb.2
|
||||
bb.2:
|
||||
ret void
|
||||
}
|
||||
declare void @foo()
|
||||
|
||||
|
@ -4,117 +4,6 @@
|
||||
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
|
||||
|
||||
define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
|
||||
; X32-LABEL: test_mm512_kunpackd:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
|
||||
; X32-NEXT: vmovdqa64 72(%ebp), %zmm4
|
||||
; X32-NEXT: vmovdqa64 8(%ebp), %zmm5
|
||||
; X32-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
|
||||
; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: vpcmpneqb %zmm5, %zmm2, %k0
|
||||
; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; X32-NEXT: kunpckdq %k0, %k1, %k1
|
||||
; X32-NEXT: vpcmpneqb %zmm3, %zmm4, %k0 {%k1}
|
||||
; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_kunpackd:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0
|
||||
; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1
|
||||
; X64-NEXT: kunpckdq %k0, %k1, %k1
|
||||
; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1}
|
||||
; X64-NEXT: kmovq %k0, %rax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__B to <64 x i8>
|
||||
%1 = bitcast <8 x i64> %__A to <64 x i8>
|
||||
%2 = icmp ne <64 x i8> %0, %1
|
||||
%3 = bitcast <64 x i1> %2 to i64
|
||||
%4 = bitcast <8 x i64> %__C to <64 x i8>
|
||||
%5 = bitcast <8 x i64> %__D to <64 x i8>
|
||||
%6 = icmp ne <64 x i8> %4, %5
|
||||
%7 = bitcast <64 x i1> %6 to i64
|
||||
%and.i = and i64 %7, 4294967295
|
||||
%shl.i = shl i64 %3, 32
|
||||
%or.i = or i64 %and.i, %shl.i
|
||||
%8 = bitcast <8 x i64> %__E to <64 x i8>
|
||||
%9 = bitcast <8 x i64> %__F to <64 x i8>
|
||||
%10 = icmp ne <64 x i8> %8, %9
|
||||
%11 = bitcast i64 %or.i to <64 x i1>
|
||||
%12 = and <64 x i1> %10, %11
|
||||
%13 = bitcast <64 x i1> %12 to i64
|
||||
ret i64 %13
|
||||
}
|
||||
|
||||
define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) {
|
||||
; X32-LABEL: test_mm512_kunpackw:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: pushl %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_offset 8
|
||||
; X32-NEXT: .cfi_offset %ebp, -8
|
||||
; X32-NEXT: movl %esp, %ebp
|
||||
; X32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; X32-NEXT: andl $-64, %esp
|
||||
; X32-NEXT: subl $64, %esp
|
||||
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
|
||||
; X32-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
|
||||
; X32-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1
|
||||
; X32-NEXT: kunpckwd %k0, %k1, %k1
|
||||
; X32-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1}
|
||||
; X32-NEXT: kmovd %k0, %eax
|
||||
; X32-NEXT: movl %ebp, %esp
|
||||
; X32-NEXT: popl %ebp
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_kunpackw:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0
|
||||
; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1
|
||||
; X64-NEXT: kunpckwd %k0, %k1, %k1
|
||||
; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1}
|
||||
; X64-NEXT: kmovd %k0, %eax
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <8 x i64> %__B to <32 x i16>
|
||||
%1 = bitcast <8 x i64> %__A to <32 x i16>
|
||||
%2 = icmp ne <32 x i16> %0, %1
|
||||
%3 = bitcast <32 x i1> %2 to i32
|
||||
%4 = bitcast <8 x i64> %__C to <32 x i16>
|
||||
%5 = bitcast <8 x i64> %__D to <32 x i16>
|
||||
%6 = icmp ne <32 x i16> %4, %5
|
||||
%7 = bitcast <32 x i1> %6 to i32
|
||||
%and.i = and i32 %7, 65535
|
||||
%shl.i = shl i32 %3, 16
|
||||
%or.i = or i32 %and.i, %shl.i
|
||||
%8 = bitcast <8 x i64> %__E to <32 x i16>
|
||||
%9 = bitcast <8 x i64> %__F to <32 x i16>
|
||||
%10 = icmp ne <32 x i16> %8, %9
|
||||
%11 = bitcast i32 %or.i to <32 x i1>
|
||||
%12 = and <32 x i1> %10, %11
|
||||
%13 = bitcast <32 x i1> %12 to i32
|
||||
ret i32 %13
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
|
||||
; X32-LABEL: test_mm512_mask_set1_epi8:
|
||||
; X32: # %bb.0: # %entry
|
||||
@ -189,46 +78,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: movb %ch, %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $55, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $9, %k0, %k1
|
||||
; X32-NEXT: andb $2, %al
|
||||
; X32-NEXT: shrb %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $54, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $10, %k0, %k1
|
||||
; X32-NEXT: movb %ch, %al
|
||||
; X32-NEXT: andb $15, %al
|
||||
; X32-NEXT: movl %eax, %edx
|
||||
; X32-NEXT: shrb $2, %dl
|
||||
; X32-NEXT: kmovd %edx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $11, %k0, %k1
|
||||
; X32-NEXT: kmovd %edx, %k3
|
||||
; X32-NEXT: shrb $3, %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $12, %eax
|
||||
; X32-NEXT: andl $15, %eax
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kmovd %eax, %k4
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $13, %eax
|
||||
; X32-NEXT: andb $1, %al
|
||||
; X32-NEXT: kmovd %eax, %k3
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $14, %eax
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: kmovd %eax, %k4
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $15, %eax
|
||||
; X32-NEXT: andl $1, %eax
|
||||
; X32-NEXT: kmovd %eax, %k5
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrl $16, %edx
|
||||
@ -243,25 +105,52 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kmovd %eax, %k7
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $55, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $9, %k0, %k1
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $54, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $10, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $11, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $52, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $12, %k0, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $51, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $13, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $50, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $14, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $49, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $15, %k0, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $48, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -494,22 +383,14 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $43, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k3
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k4
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $20, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $44, %k0, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $19, %k1, %k1
|
||||
@ -520,12 +401,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kshiftrq $18, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $46, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $17, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $47, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $16, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -551,8 +440,8 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $12, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k4
|
||||
; X32-NEXT: kshiftrq $52, %k4, %k0
|
||||
; X32-NEXT: kxorq %k0, %k1, %k3
|
||||
; X32-NEXT: kshiftrq $52, %k3, %k0
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrb $4, %dl
|
||||
; X32-NEXT: kmovd %edx, %k1
|
||||
@ -576,19 +465,19 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: andb $15, %cl
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrb $2, %dl
|
||||
; X32-NEXT: kmovd %edx, %k3
|
||||
; X32-NEXT: kmovd %edx, %k4
|
||||
; X32-NEXT: kshiftlq $63, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $11, %k5, %k5
|
||||
; X32-NEXT: kxorq %k4, %k5, %k4
|
||||
; X32-NEXT: kshiftrq $53, %k4, %k5
|
||||
; X32-NEXT: kxorq %k3, %k5, %k3
|
||||
; X32-NEXT: kshiftrq $53, %k3, %k5
|
||||
; X32-NEXT: kxorq %k6, %k5, %k5
|
||||
; X32-NEXT: kshiftlq $63, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $10, %k5, %k5
|
||||
; X32-NEXT: kxorq %k4, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $54, %k5, %k4
|
||||
; X32-NEXT: kxorq %k7, %k4, %k6
|
||||
; X32-NEXT: kxorq %k3, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $54, %k5, %k3
|
||||
; X32-NEXT: kxorq %k7, %k3, %k6
|
||||
; X32-NEXT: shrb $3, %cl
|
||||
; X32-NEXT: kmovd %ecx, %k4
|
||||
; X32-NEXT: kmovd %ecx, %k3
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $29, %ecx
|
||||
; X32-NEXT: andb $1, %cl
|
||||
@ -603,12 +492,6 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kxorq %k5, %k0, %k0
|
||||
; X32-NEXT: kshiftrq $56, %k0, %k5
|
||||
; X32-NEXT: kxorq %k1, %k5, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $28, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k5
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $30, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k6
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $7, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -618,17 +501,20 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kshiftrq $6, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $58, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $5, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $59, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $4, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $60, %k0, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $28, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -638,7 +524,10 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext
|
||||
; X32-NEXT: kshiftrq $2, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $62, %k0, %k1
|
||||
; X32-NEXT: kxorq %k6, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $30, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: shrl $31, %eax
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
@ -743,46 +632,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: movb %ch, %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $55, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $9, %k0, %k1
|
||||
; X32-NEXT: andb $2, %al
|
||||
; X32-NEXT: shrb %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $54, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $10, %k0, %k1
|
||||
; X32-NEXT: movb %ch, %al
|
||||
; X32-NEXT: andb $15, %al
|
||||
; X32-NEXT: movl %eax, %edx
|
||||
; X32-NEXT: shrb $2, %dl
|
||||
; X32-NEXT: kmovd %edx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $11, %k0, %k1
|
||||
; X32-NEXT: kmovd %edx, %k3
|
||||
; X32-NEXT: shrb $3, %al
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $12, %eax
|
||||
; X32-NEXT: andl $15, %eax
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: kmovd %eax, %k4
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $13, %eax
|
||||
; X32-NEXT: andb $1, %al
|
||||
; X32-NEXT: kmovd %eax, %k3
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $14, %eax
|
||||
; X32-NEXT: andl $3, %eax
|
||||
; X32-NEXT: kmovd %eax, %k4
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: shrl $15, %eax
|
||||
; X32-NEXT: andl $1, %eax
|
||||
; X32-NEXT: kmovd %eax, %k5
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrl $16, %edx
|
||||
@ -797,25 +659,52 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kmovd %eax, %k7
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $55, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $9, %k0, %k1
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $54, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $10, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $53, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $11, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $52, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $12, %k0, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $51, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $13, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $50, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $14, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $49, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $15, %k0, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: movl %ecx, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $48, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -1048,22 +937,14 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $43, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k3
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k4
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $20, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $44, %k0, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $12, %esi
|
||||
; X32-NEXT: andl $15, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $19, %k1, %k1
|
||||
@ -1074,12 +955,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kshiftrq $18, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $46, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $14, %esi
|
||||
; X32-NEXT: andl $3, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $17, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $47, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %esi
|
||||
; X32-NEXT: shrl $15, %esi
|
||||
; X32-NEXT: andl $1, %esi
|
||||
; X32-NEXT: kmovd %esi, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $16, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -1105,8 +994,8 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $12, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k4
|
||||
; X32-NEXT: kshiftrq $52, %k4, %k0
|
||||
; X32-NEXT: kxorq %k0, %k1, %k3
|
||||
; X32-NEXT: kshiftrq $52, %k3, %k0
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrb $4, %dl
|
||||
; X32-NEXT: kmovd %edx, %k1
|
||||
@ -1130,19 +1019,19 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: andb $15, %cl
|
||||
; X32-NEXT: movl %ecx, %edx
|
||||
; X32-NEXT: shrb $2, %dl
|
||||
; X32-NEXT: kmovd %edx, %k3
|
||||
; X32-NEXT: kmovd %edx, %k4
|
||||
; X32-NEXT: kshiftlq $63, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $11, %k5, %k5
|
||||
; X32-NEXT: kxorq %k4, %k5, %k4
|
||||
; X32-NEXT: kshiftrq $53, %k4, %k5
|
||||
; X32-NEXT: kxorq %k3, %k5, %k3
|
||||
; X32-NEXT: kshiftrq $53, %k3, %k5
|
||||
; X32-NEXT: kxorq %k6, %k5, %k5
|
||||
; X32-NEXT: kshiftlq $63, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $10, %k5, %k5
|
||||
; X32-NEXT: kxorq %k4, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $54, %k5, %k4
|
||||
; X32-NEXT: kxorq %k7, %k4, %k6
|
||||
; X32-NEXT: kxorq %k3, %k5, %k5
|
||||
; X32-NEXT: kshiftrq $54, %k5, %k3
|
||||
; X32-NEXT: kxorq %k7, %k3, %k6
|
||||
; X32-NEXT: shrb $3, %cl
|
||||
; X32-NEXT: kmovd %ecx, %k4
|
||||
; X32-NEXT: kmovd %ecx, %k3
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $29, %ecx
|
||||
; X32-NEXT: andb $1, %cl
|
||||
@ -1157,12 +1046,6 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kxorq %k5, %k0, %k0
|
||||
; X32-NEXT: kshiftrq $56, %k0, %k5
|
||||
; X32-NEXT: kxorq %k1, %k5, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $28, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k5
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $30, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k6
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $7, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -1172,17 +1055,20 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kshiftrq $6, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $58, %k0, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $5, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $59, %k0, %k1
|
||||
; X32-NEXT: kxorq %k4, %k1, %k1
|
||||
; X32-NEXT: kxorq %k3, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $4, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $60, %k0, %k1
|
||||
; X32-NEXT: kxorq %k5, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $28, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: kshiftlq $63, %k1, %k1
|
||||
; X32-NEXT: kshiftrq $3, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
@ -1192,7 +1078,10 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) {
|
||||
; X32-NEXT: kshiftrq $2, %k1, %k1
|
||||
; X32-NEXT: kxorq %k0, %k1, %k0
|
||||
; X32-NEXT: kshiftrq $62, %k0, %k1
|
||||
; X32-NEXT: kxorq %k6, %k1, %k1
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: shrl $30, %ecx
|
||||
; X32-NEXT: kmovd %ecx, %k2
|
||||
; X32-NEXT: kxorq %k2, %k1, %k1
|
||||
; X32-NEXT: shrl $31, %eax
|
||||
; X32-NEXT: kmovd %eax, %k2
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
|
@ -2,46 +2,6 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
|
||||
|
||||
declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
|
||||
|
||||
define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: movzwl %di, %eax
|
||||
; AVX512BW-NEXT: shll $16, %esi
|
||||
; AVX512BW-NEXT: orl %esi, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: shll $16, %eax
|
||||
; AVX512F-32-NEXT: orl %ecx, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
|
||||
|
||||
define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: movl %edi, %eax
|
||||
; AVX512BW-NEXT: shlq $32, %rsi
|
||||
; AVX512BW-NEXT: orq %rsi, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
|
||||
|
||||
define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
|
||||
|
@ -1455,6 +1455,55 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8>
|
||||
ret <8 x i64> %res2
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
|
||||
|
||||
define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: kmovd %edi, %k0
|
||||
; AVX512BW-NEXT: kmovd %esi, %k1
|
||||
; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
|
||||
; AVX512F-32-NEXT: kmovd %k0, %eax
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
|
||||
|
||||
define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
|
||||
; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: kmovq %rdi, %k0
|
||||
; AVX512BW-NEXT: kmovq %rsi, %k1
|
||||
; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
|
||||
; AVX512BW-NEXT: kmovq %k0, %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
|
||||
; AVX512F-32: # %bb.0:
|
||||
; AVX512F-32-NEXT: subl $12, %esp
|
||||
; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
|
||||
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
|
||||
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
|
||||
; AVX512F-32-NEXT: kmovq %k0, (%esp)
|
||||
; AVX512F-32-NEXT: movl (%esp), %eax
|
||||
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; AVX512F-32-NEXT: addl $12, %esp
|
||||
; AVX512F-32-NEXT: retl
|
||||
%res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
|
||||
|
||||
define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
|
||||
|
@ -1,22 +1,23 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass x86-domain-reassignment -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq -o - %s | FileCheck %s
|
||||
--- |
|
||||
; ModuleID = '../test/CodeGen/X86/gpr-to-mask.ll'
|
||||
source_filename = "../test/CodeGen/X86/gpr-to-mask.ll"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
|
||||
define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) #0 {
|
||||
entry:
|
||||
br i1 %cond, label %if, label %else
|
||||
|
||||
|
||||
if: ; preds = %entry
|
||||
%cmp1 = fcmp oeq float %f3, %f4
|
||||
br label %exit
|
||||
|
||||
|
||||
else: ; preds = %entry
|
||||
%cmp2 = fcmp oeq float %f5, %f6
|
||||
br label %exit
|
||||
|
||||
|
||||
exit: ; preds = %else, %if
|
||||
%val = phi i1 [ %cmp1, %if ], [ %cmp2, %else ]
|
||||
%selected = select i1 %val, float %f1, float %f2
|
||||
@ -48,14 +49,13 @@
|
||||
...
|
||||
---
|
||||
name: test_fcmp_storefloat
|
||||
# CHECK-LABEL: name: test_fcmp_storefloat
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr8, preferred-register: '' }
|
||||
- { id: 1, class: gr8, preferred-register: '' }
|
||||
- { id: 2, class: gr8, preferred-register: '' }
|
||||
@ -79,7 +79,7 @@ registers:
|
||||
- { id: 20, class: fr128, preferred-register: '' }
|
||||
- { id: 21, class: fr128, preferred-register: '' }
|
||||
- { id: 22, class: fr32x, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%edi', virtual-reg: '%3' }
|
||||
- { reg: '%rsi', virtual-reg: '%4' }
|
||||
- { reg: '%xmm0', virtual-reg: '%5' }
|
||||
@ -88,7 +88,7 @@ liveins:
|
||||
- { reg: '%xmm3', virtual-reg: '%8' }
|
||||
- { reg: '%xmm4', virtual-reg: '%9' }
|
||||
- { reg: '%xmm5', virtual-reg: '%10' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -105,14 +105,51 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_fcmp_storefloat
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: liveins: %edi, %rsi, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
|
||||
; CHECK: [[COPY:%[0-9]+]]:fr32x = COPY %xmm5
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fr32x = COPY %xmm4
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fr32x = COPY %xmm3
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fr32x = COPY %xmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:fr32x = COPY %xmm1
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vr128x = COPY %xmm0
|
||||
; CHECK: [[COPY6:%[0-9]+]]:gr64 = COPY %rsi
|
||||
; CHECK: [[COPY7:%[0-9]+]]:gr32 = COPY %edi
|
||||
; CHECK: [[COPY8:%[0-9]+]]:gr8 = COPY [[COPY7]].sub_8bit
|
||||
; CHECK: TEST8ri killed [[COPY8]], 1, implicit-def %eflags
|
||||
; CHECK: JE_1 %bb.2, implicit %eflags
|
||||
; CHECK: JMP_1 %bb.1
|
||||
; CHECK: bb.1.if:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[VCMPSSZrr:%[0-9]+]]:vk1 = VCMPSSZrr [[COPY3]], [[COPY2]], 0
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vk32 = COPY [[VCMPSSZrr]]
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vk8 = COPY [[COPY9]]
|
||||
; CHECK: JMP_1 %bb.3
|
||||
; CHECK: bb.2.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[VCMPSSZrr1:%[0-9]+]]:vk1 = VCMPSSZrr [[COPY1]], [[COPY]], 0
|
||||
; CHECK: [[COPY11:%[0-9]+]]:vk32 = COPY [[VCMPSSZrr1]]
|
||||
; CHECK: [[COPY12:%[0-9]+]]:vk8 = COPY [[COPY11]]
|
||||
; CHECK: bb.3.exit:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vk8 = PHI [[COPY12]], %bb.2, [[COPY10]], %bb.1
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vk32 = COPY [[PHI]]
|
||||
; CHECK: [[COPY14:%[0-9]+]]:vk1wm = COPY [[COPY13]]
|
||||
; CHECK: [[COPY15:%[0-9]+]]:vr128x = COPY [[COPY4]]
|
||||
; CHECK: [[DEF:%[0-9]+]]:fr128 = IMPLICIT_DEF
|
||||
; CHECK: [[VMOVSSZrrk:%[0-9]+]]:fr128 = VMOVSSZrrk [[COPY15]], killed [[COPY14]], killed [[DEF]], [[COPY5]]
|
||||
; CHECK: [[COPY16:%[0-9]+]]:fr32x = COPY [[VMOVSSZrrk]]
|
||||
; CHECK: VMOVSSZmr [[COPY6]], 1, %noreg, 0, %noreg, killed [[COPY16]] :: (store 4 into %ir.fptr)
|
||||
; CHECK: RET 0
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
liveins: %edi, %rsi, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
|
||||
|
||||
|
||||
%10 = COPY %xmm5
|
||||
%9 = COPY %xmm4
|
||||
%8 = COPY %xmm3
|
||||
@ -125,38 +162,31 @@ body: |
|
||||
TEST8ri killed %11, 1, implicit-def %eflags
|
||||
JE_1 %bb.2, implicit %eflags
|
||||
JMP_1 %bb.1
|
||||
|
||||
|
||||
bb.1.if:
|
||||
successors: %bb.3(0x80000000)
|
||||
|
||||
|
||||
%14 = VCMPSSZrr %7, %8, 0
|
||||
|
||||
; check that cross domain copies are replaced with same domain copies.
|
||||
; CHECK: %15:vk32 = COPY %14
|
||||
; CHECK: %0:vk8 = COPY %15
|
||||
|
||||
|
||||
%15 = COPY %14
|
||||
%0 = COPY %15.sub_8bit
|
||||
JMP_1 %bb.3
|
||||
|
||||
|
||||
bb.2.else:
|
||||
successors: %bb.3(0x80000000)
|
||||
%12 = VCMPSSZrr %9, %10, 0
|
||||
|
||||
; check that cross domain copies are replaced with same domain copies.
|
||||
; CHECK: %13:vk32 = COPY %12
|
||||
; CHECK: %1:vk8 = COPY %13
|
||||
|
||||
%13 = COPY %12
|
||||
%1 = COPY %13.sub_8bit
|
||||
|
||||
|
||||
bb.3.exit:
|
||||
|
||||
; check PHI, IMPLICIT_DEF, and INSERT_SUBREG replacers.
|
||||
; CHECK: %2:vk8 = PHI %1, %bb.2, %0, %bb.1
|
||||
; CHECK: %16:vk32 = COPY %2
|
||||
; CHECK: %18:vk1wm = COPY %16
|
||||
|
||||
|
||||
%2 = PHI %1, %bb.2, %0, %bb.1
|
||||
%17 = IMPLICIT_DEF
|
||||
%16 = INSERT_SUBREG %17, %2, 1
|
||||
@ -171,14 +201,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_8bitops
|
||||
# CHECK-LABEL: name: test_8bitops
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -198,13 +227,13 @@ registers:
|
||||
- { id: 16, class: gr8, preferred-register: '' }
|
||||
- { id: 17, class: gr8, preferred-register: '' }
|
||||
- { id: 18, class: gr8, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
- { reg: '%zmm2', virtual-reg: '%3' }
|
||||
- { reg: '%zmm3', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -221,32 +250,50 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_8bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY %zmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY %zmm3
|
||||
; CHECK: [[VCMPPDZrri:%[0-9]+]]:vk8 = VCMPPDZrri [[COPY3]], [[COPY4]], 0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPDZrri]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vk8 = COPY [[COPY5]]
|
||||
; CHECK: [[KSHIFTRBri:%[0-9]+]]:vk8 = KSHIFTRBri [[COPY6]], 2
|
||||
; CHECK: [[KSHIFTLBri:%[0-9]+]]:vk8 = KSHIFTLBri [[KSHIFTRBri]], 1
|
||||
; CHECK: [[KNOTBrr:%[0-9]+]]:vk8 = KNOTBrr [[KSHIFTLBri]]
|
||||
; CHECK: [[KORBrr:%[0-9]+]]:vk8 = KORBrr [[KNOTBrr]], [[KSHIFTRBri]]
|
||||
; CHECK: [[KANDBrr:%[0-9]+]]:vk8 = KANDBrr [[KORBrr]], [[KSHIFTLBri]]
|
||||
; CHECK: [[KXORBrr:%[0-9]+]]:vk8 = KXORBrr [[KANDBrr]], [[KSHIFTRBri]]
|
||||
; CHECK: [[KADDBrr:%[0-9]+]]:vk8 = KADDBrr [[KXORBrr]], [[KNOTBrr]]
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KADDBrr]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPDZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPDZrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
|
||||
|
||||
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
%3 = COPY %zmm2
|
||||
%4 = COPY %zmm3
|
||||
|
||||
|
||||
%5 = VCMPPDZrri %3, %4, 0
|
||||
; CHECK: %6:vk32 = COPY %5
|
||||
; CHECK: %7:vk8 = COPY %6
|
||||
%6 = COPY %5
|
||||
%7 = COPY %6.sub_8bit
|
||||
|
||||
; CHECK: %12:vk8 = KSHIFTRBri %7, 2
|
||||
; CHECK: %13:vk8 = KSHIFTLBri %12, 1
|
||||
; CHECK: %14:vk8 = KNOTBrr %13
|
||||
; CHECK: %15:vk8 = KORBrr %14, %12
|
||||
; CHECK: %16:vk8 = KANDBrr %15, %13
|
||||
; CHECK: %17:vk8 = KXORBrr %16, %12
|
||||
; CHECK: %18:vk8 = KADDBrr %17, %14
|
||||
%12 = SHR8ri %7, 2, implicit-def dead %eflags
|
||||
%13 = SHL8ri %12, 1, implicit-def dead %eflags
|
||||
%14 = NOT8r %13
|
||||
@ -254,19 +301,17 @@ body: |
|
||||
%16 = AND8rr %15, %13, implicit-def dead %eflags
|
||||
%17 = XOR8rr %16, %12, implicit-def dead %eflags
|
||||
%18 = ADD8rr %17, %14, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %9:vk32 = COPY %18
|
||||
; CHECK: %10:vk8wm = COPY %9
|
||||
|
||||
%8 = IMPLICIT_DEF
|
||||
%9 = INSERT_SUBREG %8, %18, 1
|
||||
%10 = COPY %9
|
||||
%11 = VMOVAPDZrrk %2, killed %10, %1
|
||||
VMOVAPDZmr %0, 1, %noreg, 0, %noreg, killed %11
|
||||
VMOVAPDZmr %0, 1, %noreg, 0, %noreg, killed %11
|
||||
|
||||
; CHECK: KTESTBrr %18, %18, implicit-def %eflags
|
||||
TEST8rr %18, %18, implicit-def %eflags
|
||||
JE_1 %bb.1, implicit %eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST8rr %18, %18, implicit-def %eflags
|
||||
; JE_1 %bb.1, implicit %eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
@ -276,14 +321,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_16bitops
|
||||
# CHECK-LABEL: name: test_16bitops
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -302,13 +346,13 @@ registers:
|
||||
- { id: 15, class: gr16, preferred-register: '' }
|
||||
- { id: 16, class: gr16, preferred-register: '' }
|
||||
- { id: 17, class: gr16, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
- { reg: '%zmm2', virtual-reg: '%3' }
|
||||
- { reg: '%zmm3', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -325,50 +369,66 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_16bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vr512 = COPY %zmm2
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vr512 = COPY %zmm3
|
||||
; CHECK: [[VCMPPSZrri:%[0-9]+]]:vk16 = VCMPPSZrri [[COPY3]], [[COPY4]], 0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk32 = COPY [[VCMPPSZrri]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vk16 = COPY [[COPY5]]
|
||||
; CHECK: [[KSHIFTRWri:%[0-9]+]]:vk16 = KSHIFTRWri [[COPY6]], 2
|
||||
; CHECK: [[KSHIFTLWri:%[0-9]+]]:vk16 = KSHIFTLWri [[KSHIFTRWri]], 1
|
||||
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[KSHIFTLWri]]
|
||||
; CHECK: [[KORWrr:%[0-9]+]]:vk16 = KORWrr [[KNOTWrr]], [[KSHIFTRWri]]
|
||||
; CHECK: [[KANDWrr:%[0-9]+]]:vk16 = KANDWrr [[KORWrr]], [[KSHIFTLWri]]
|
||||
; CHECK: [[KXORWrr:%[0-9]+]]:vk16 = KXORWrr [[KANDWrr]], [[KSHIFTRWri]]
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vk32 = COPY [[KXORWrr]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPSZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPSZrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1, %zmm2, %zmm3
|
||||
|
||||
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
%3 = COPY %zmm2
|
||||
%4 = COPY %zmm3
|
||||
|
||||
|
||||
%5 = VCMPPSZrri %3, %4, 0
|
||||
; CHECK: %6:vk32 = COPY %5
|
||||
; CHECK: %7:vk16 = COPY %6
|
||||
%6 = COPY %5
|
||||
%7 = COPY %6.sub_16bit
|
||||
|
||||
; CHECK: %12:vk16 = KSHIFTRWri %7, 2
|
||||
; CHECK: %13:vk16 = KSHIFTLWri %12, 1
|
||||
; CHECK: %14:vk16 = KNOTWrr %13
|
||||
; CHECK: %15:vk16 = KORWrr %14, %12
|
||||
; CHECK: %16:vk16 = KANDWrr %15, %13
|
||||
; CHECK: %17:vk16 = KXORWrr %16, %12
|
||||
%12 = SHR16ri %7, 2, implicit-def dead %eflags
|
||||
%13 = SHL16ri %12, 1, implicit-def dead %eflags
|
||||
%14 = NOT16r %13
|
||||
%15 = OR16rr %14, %12, implicit-def dead %eflags
|
||||
%16 = AND16rr %15, %13, implicit-def dead %eflags
|
||||
%17 = XOR16rr %16, %12, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %9:vk32 = COPY %17
|
||||
; CHECK: %10:vk16wm = COPY %9
|
||||
|
||||
%8 = IMPLICIT_DEF
|
||||
%9 = INSERT_SUBREG %8, %17, 3
|
||||
%10 = COPY %9
|
||||
%11 = VMOVAPSZrrk %2, killed %10, %1
|
||||
VMOVAPSZmr %0, 1, %noreg, 0, %noreg, killed %11
|
||||
VMOVAPSZmr %0, 1, %noreg, 0, %noreg, killed %11
|
||||
|
||||
; CHECK: KTESTWrr %17, %17, implicit-def %eflags
|
||||
TEST16rr %17, %17, implicit-def %eflags
|
||||
JE_1 %bb.1, implicit %eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST16rr %17, %17, implicit-def %eflags
|
||||
; JE_1 %bb.1, implicit %eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
@ -378,14 +438,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_32bitops
|
||||
# CHECK-LABEL: name: test_32bitops
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -400,11 +459,11 @@ registers:
|
||||
- { id: 11, class: gr32, preferred-register: '' }
|
||||
- { id: 12, class: gr32, preferred-register: '' }
|
||||
- { id: 13, class: gr32, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -421,26 +480,40 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_32bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[KMOVDkm:%[0-9]+]]:vk32 = KMOVDkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[KSHIFTRDri:%[0-9]+]]:vk32 = KSHIFTRDri [[KMOVDkm]], 2
|
||||
; CHECK: [[KSHIFTLDri:%[0-9]+]]:vk32 = KSHIFTLDri [[KSHIFTRDri]], 1
|
||||
; CHECK: [[KNOTDrr:%[0-9]+]]:vk32 = KNOTDrr [[KSHIFTLDri]]
|
||||
; CHECK: [[KORDrr:%[0-9]+]]:vk32 = KORDrr [[KNOTDrr]], [[KSHIFTRDri]]
|
||||
; CHECK: [[KANDDrr:%[0-9]+]]:vk32 = KANDDrr [[KORDrr]], [[KSHIFTLDri]]
|
||||
; CHECK: [[KXORDrr:%[0-9]+]]:vk32 = KXORDrr [[KANDDrr]], [[KSHIFTRDri]]
|
||||
; CHECK: [[KANDNDrr:%[0-9]+]]:vk32 = KANDNDrr [[KXORDrr]], [[KORDrr]]
|
||||
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[KANDNDrr]], [[KXORDrr]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
|
||||
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU16Zrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1
|
||||
|
||||
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
|
||||
; CHECK: %5:vk32 = KMOVDkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %6:vk32 = KSHIFTRDri %5, 2
|
||||
; CHECK: %7:vk32 = KSHIFTLDri %6, 1
|
||||
; CHECK: %8:vk32 = KNOTDrr %7
|
||||
; CHECK: %9:vk32 = KORDrr %8, %6
|
||||
; CHECK: %10:vk32 = KANDDrr %9, %7
|
||||
; CHECK: %11:vk32 = KXORDrr %10, %6
|
||||
; CHECK: %12:vk32 = KANDNDrr %11, %9
|
||||
; CHECK: %13:vk32 = KADDDrr %12, %11
|
||||
|
||||
%5 = MOV32rm %0, 1, %noreg, 0, %noreg
|
||||
%6 = SHR32ri %5, 2, implicit-def dead %eflags
|
||||
%7 = SHL32ri %6, 1, implicit-def dead %eflags
|
||||
@ -450,16 +523,15 @@ body: |
|
||||
%11 = XOR32rr %10, %6, implicit-def dead %eflags
|
||||
%12 = ANDN32rr %11, %9, implicit-def dead %eflags
|
||||
%13 = ADD32rr %12, %11, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %3:vk32wm = COPY %13
|
||||
|
||||
%3 = COPY %13
|
||||
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
|
||||
|
||||
; CHECK: KTESTDrr %13, %13, implicit-def %eflags
|
||||
TEST32rr %13, %13, implicit-def %eflags
|
||||
JE_1 %bb.1, implicit %eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST32rr %13, %13, implicit-def %eflags
|
||||
; JE_1 %bb.1, implicit %eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
@ -469,14 +541,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_64bitops
|
||||
# CHECK-LABEL: name: test_64bitops
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -491,11 +562,11 @@ registers:
|
||||
- { id: 11, class: gr64, preferred-register: '' }
|
||||
- { id: 12, class: gr64, preferred-register: '' }
|
||||
- { id: 13, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -512,26 +583,40 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
; CHECK-LABEL: name: test_64bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[KMOVQkm:%[0-9]+]]:vk64 = KMOVQkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[KSHIFTRQri:%[0-9]+]]:vk64 = KSHIFTRQri [[KMOVQkm]], 2
|
||||
; CHECK: [[KSHIFTLQri:%[0-9]+]]:vk64 = KSHIFTLQri [[KSHIFTRQri]], 1
|
||||
; CHECK: [[KNOTQrr:%[0-9]+]]:vk64 = KNOTQrr [[KSHIFTLQri]]
|
||||
; CHECK: [[KORQrr:%[0-9]+]]:vk64 = KORQrr [[KNOTQrr]], [[KSHIFTRQri]]
|
||||
; CHECK: [[KANDQrr:%[0-9]+]]:vk64 = KANDQrr [[KORQrr]], [[KSHIFTLQri]]
|
||||
; CHECK: [[KXORQrr:%[0-9]+]]:vk64 = KXORQrr [[KANDQrr]], [[KSHIFTRQri]]
|
||||
; CHECK: [[KANDNQrr:%[0-9]+]]:vk64 = KANDNQrr [[KXORQrr]], [[KORQrr]]
|
||||
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[KANDNQrr]], [[KXORQrr]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU8Zrrk]]
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: RET 0
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1
|
||||
|
||||
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
|
||||
; CHECK: %5:vk64 = KMOVQkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %6:vk64 = KSHIFTRQri %5, 2
|
||||
; CHECK: %7:vk64 = KSHIFTLQri %6, 1
|
||||
; CHECK: %8:vk64 = KNOTQrr %7
|
||||
; CHECK: %9:vk64 = KORQrr %8, %6
|
||||
; CHECK: %10:vk64 = KANDQrr %9, %7
|
||||
; CHECK: %11:vk64 = KXORQrr %10, %6
|
||||
; CHECK: %12:vk64 = KANDNQrr %11, %9
|
||||
; CHECK: %13:vk64 = KADDQrr %12, %11
|
||||
|
||||
%5 = MOV64rm %0, 1, %noreg, 0, %noreg
|
||||
%6 = SHR64ri %5, 2, implicit-def dead %eflags
|
||||
%7 = SHL64ri %6, 1, implicit-def dead %eflags
|
||||
@ -541,16 +626,15 @@ body: |
|
||||
%11 = XOR64rr %10, %6, implicit-def dead %eflags
|
||||
%12 = ANDN64rr %11, %9, implicit-def dead %eflags
|
||||
%13 = ADD64rr %12, %11, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %3:vk64wm = COPY %13
|
||||
|
||||
%3 = COPY %13
|
||||
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
|
||||
|
||||
; CHECK: KTESTQrr %13, %13, implicit-def %eflags
|
||||
TEST64rr %13, %13, implicit-def %eflags
|
||||
JE_1 %bb.1, implicit %eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST64rr %13, %13, implicit-def %eflags
|
||||
; JE_1 %bb.1, implicit %eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
@ -560,14 +644,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_16bitext
|
||||
# CHECK-LABEL: name: test_16bitext
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -575,11 +658,11 @@ registers:
|
||||
- { id: 4, class: vr512, preferred-register: '' }
|
||||
- { id: 5, class: gr16, preferred-register: '' }
|
||||
- { id: 6, class: gr16, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -596,24 +679,32 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1
|
||||
|
||||
|
||||
; CHECK-LABEL: name: test_16bitext
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk16 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KNOTWrr:%[0-9]+]]:vk16 = KNOTWrr [[COPY3]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk16wm = COPY [[KNOTWrr]]
|
||||
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY4]], [[COPY1]]
|
||||
; CHECK: VMOVAPSZmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVAPSZrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
|
||||
; CHECK: %7:vk8 = KMOVBkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %5:vk16 = COPY %7
|
||||
; CHECK: %6:vk16 = KNOTWrr %5
|
||||
|
||||
%5 = MOVZX16rm8 %0, 1, %noreg, 0, %noreg
|
||||
%6 = NOT16r %5
|
||||
|
||||
; CHECK: %3:vk16wm = COPY %6
|
||||
%3 = COPY %6
|
||||
%4 = VMOVAPSZrrk %2, killed %3, %1
|
||||
VMOVAPSZmr %0, 1, %noreg, 0, %noreg, killed %4
|
||||
@ -622,14 +713,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_32bitext
|
||||
# CHECK-LABEL: name: test_32bitext
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -638,11 +728,11 @@ registers:
|
||||
- { id: 5, class: gr32, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr32, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -659,27 +749,35 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1
|
||||
|
||||
|
||||
; CHECK-LABEL: name: test_32bitext
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk32 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk32 = COPY [[KMOVWkm]]
|
||||
; CHECK: [[KADDDrr:%[0-9]+]]:vk32 = KADDDrr [[COPY3]], [[COPY4]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDDrr]]
|
||||
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU16Zrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
|
||||
; CHECK: %8:vk8 = KMOVBkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %5:vk32 = COPY %8
|
||||
; CHECK: %9:vk16 = KMOVWkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %6:vk32 = COPY %9
|
||||
; CHECK: %7:vk32 = KADDDrr %5, %6
|
||||
|
||||
%5 = MOVZX32rm8 %0, 1, %noreg, 0, %noreg
|
||||
%6 = MOVZX32rm16 %0, 1, %noreg, 0, %noreg
|
||||
%7 = ADD32rr %5, %6, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %3:vk64wm = COPY %7
|
||||
%3 = COPY %7
|
||||
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
|
||||
@ -688,14 +786,13 @@ body: |
|
||||
...
|
||||
---
|
||||
name: test_64bitext
|
||||
# CHECK-LABEL: name: test_64bitext
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: vr512, preferred-register: '' }
|
||||
- { id: 2, class: vr512, preferred-register: '' }
|
||||
@ -704,11 +801,11 @@ registers:
|
||||
- { id: 5, class: gr64, preferred-register: '' }
|
||||
- { id: 6, class: gr64, preferred-register: '' }
|
||||
- { id: 7, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
liveins:
|
||||
- { reg: '%rdi', virtual-reg: '%0' }
|
||||
- { reg: '%zmm0', virtual-reg: '%1' }
|
||||
- { reg: '%zmm1', virtual-reg: '%2' }
|
||||
frameInfo:
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
@ -725,27 +822,35 @@ frameInfo:
|
||||
hasMustTailInVarArgFunc: false
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
fixedStack:
|
||||
stack:
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %rdi, %zmm0, %zmm1
|
||||
|
||||
|
||||
; CHECK-LABEL: name: test_64bitext
|
||||
; CHECK: liveins: %rdi, %zmm0, %zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY %rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY %zmm0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr512 = COPY %zmm1
|
||||
; CHECK: [[KMOVBkm:%[0-9]+]]:vk8 = KMOVBkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vk64 = COPY [[KMOVBkm]]
|
||||
; CHECK: [[KMOVWkm:%[0-9]+]]:vk16 = KMOVWkm [[COPY]], 1, %noreg, 0, %noreg
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vk64 = COPY [[KMOVWkm]]
|
||||
; CHECK: [[KADDQrr:%[0-9]+]]:vk64 = KADDQrr [[COPY3]], [[COPY4]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY5]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, %noreg, 0, %noreg, killed [[VMOVDQU8Zrrk]]
|
||||
; CHECK: RET 0
|
||||
%0 = COPY %rdi
|
||||
%1 = COPY %zmm0
|
||||
%2 = COPY %zmm1
|
||||
|
||||
; CHECK: %8:vk8 = KMOVBkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %5:vk64 = COPY %8
|
||||
; CHECK: %9:vk16 = KMOVWkm %0, 1, %noreg, 0, %noreg
|
||||
; CHECK: %6:vk64 = COPY %9
|
||||
; CHECK: %7:vk64 = KADDQrr %5, %6
|
||||
|
||||
%5 = MOVZX64rm8 %0, 1, %noreg, 0, %noreg
|
||||
%6 = MOVZX64rm16 %0, 1, %noreg, 0, %noreg
|
||||
%7 = ADD64rr %5, %6, implicit-def dead %eflags
|
||||
|
||||
; CHECK: %3:vk64wm = COPY %7
|
||||
%3 = COPY %7
|
||||
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, %noreg, 0, %noreg, killed %4
|
||||
|
14
test/CodeGen/X86/inline-asm-modifier-V.ll
Normal file
14
test/CodeGen/X86/inline-asm-modifier-V.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s
|
||||
|
||||
; If the target does not have 64-bit integer registers, emit 32-bit register
|
||||
; names.
|
||||
|
||||
; X86: call __x86_indirect_thunk_e{{[abcd]}}x
|
||||
; X64: call __x86_indirect_thunk_r
|
||||
|
||||
define void @q_modifier(i32* %p) {
|
||||
entry:
|
||||
tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
|
||||
ret void
|
||||
}
|
22
test/CodeGen/X86/pr36199.ll
Normal file
22
test/CodeGen/X86/pr36199.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s
|
||||
|
||||
define void @foo() unnamed_addr #0 {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vaddps %zmm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vmovups %zmm0, (%rax)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%1 = fadd <16 x float> undef, undef
|
||||
%bc256 = bitcast <16 x float> %1 to <4 x i128>
|
||||
%2 = extractelement <4 x i128> %bc256, i32 0
|
||||
%3 = bitcast i128 %2 to <4 x float>
|
||||
%4 = shufflevector <4 x float> %3, <4 x float> undef, <16 x i32> <i32 0, i32
|
||||
1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0,
|
||||
i32 1, i32 2, i32 3>
|
||||
store <16 x float> %4, <16 x float>* undef, align 4
|
||||
ret void
|
||||
}
|
@ -23,18 +23,18 @@ entry:
|
||||
; X64: callq bar
|
||||
; X64-DAG: movl %[[x]], %edi
|
||||
; X64-DAG: movq %[[fp]], %r11
|
||||
; X64: callq __llvm_external_retpoline_r11
|
||||
; X64: callq __x86_indirect_thunk_r11
|
||||
; X64: movl %[[x]], %edi
|
||||
; X64: callq bar
|
||||
; X64-DAG: movl %[[x]], %edi
|
||||
; X64-DAG: movq %[[fp]], %r11
|
||||
; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X64FAST-LABEL: icall_reg:
|
||||
; X64FAST: callq bar
|
||||
; X64FAST: callq __llvm_external_retpoline_r11
|
||||
; X64FAST: callq __x86_indirect_thunk_r11
|
||||
; X64FAST: callq bar
|
||||
; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X86-LABEL: icall_reg:
|
||||
; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
|
||||
@ -43,19 +43,19 @@ entry:
|
||||
; X86: calll bar
|
||||
; X86: movl %[[fp]], %eax
|
||||
; X86: pushl %[[x]]
|
||||
; X86: calll __llvm_external_retpoline_eax
|
||||
; X86: calll __x86_indirect_thunk_eax
|
||||
; X86: pushl %[[x]]
|
||||
; X86: calll bar
|
||||
; X86: movl %[[fp]], %eax
|
||||
; X86: pushl %[[x]]
|
||||
; X86: calll __llvm_external_retpoline_eax
|
||||
; X86: calll __x86_indirect_thunk_eax
|
||||
; X86-NOT: # TAILCALL
|
||||
|
||||
; X86FAST-LABEL: icall_reg:
|
||||
; X86FAST: calll bar
|
||||
; X86FAST: calll __llvm_external_retpoline_eax
|
||||
; X86FAST: calll __x86_indirect_thunk_eax
|
||||
; X86FAST: calll bar
|
||||
; X86FAST: calll __llvm_external_retpoline_eax
|
||||
; X86FAST: calll __x86_indirect_thunk_eax
|
||||
|
||||
|
||||
@global_fp = external global void (i32)*
|
||||
@ -72,28 +72,28 @@ define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
|
||||
; X64-LABEL: icall_global_fp:
|
||||
; X64-DAG: movl %edi, %[[x:[^ ]*]]
|
||||
; X64-DAG: movq global_fp(%rip), %r11
|
||||
; X64: callq __llvm_external_retpoline_r11
|
||||
; X64: callq __x86_indirect_thunk_r11
|
||||
; X64-DAG: movl %[[x]], %edi
|
||||
; X64-DAG: movq global_fp(%rip), %r11
|
||||
; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X64FAST-LABEL: icall_global_fp:
|
||||
; X64FAST: movq global_fp(%rip), %r11
|
||||
; X64FAST: callq __llvm_external_retpoline_r11
|
||||
; X64FAST: callq __x86_indirect_thunk_r11
|
||||
; X64FAST: movq global_fp(%rip), %r11
|
||||
; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X86-LABEL: icall_global_fp:
|
||||
; X86: movl global_fp, %eax
|
||||
; X86: pushl 4(%esp)
|
||||
; X86: calll __llvm_external_retpoline_eax
|
||||
; X86: calll __x86_indirect_thunk_eax
|
||||
; X86: addl $4, %esp
|
||||
; X86: movl global_fp, %eax
|
||||
; X86: jmp __llvm_external_retpoline_eax # TAILCALL
|
||||
; X86: jmp __x86_indirect_thunk_eax # TAILCALL
|
||||
|
||||
; X86FAST-LABEL: icall_global_fp:
|
||||
; X86FAST: calll __llvm_external_retpoline_eax
|
||||
; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
|
||||
; X86FAST: calll __x86_indirect_thunk_eax
|
||||
; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
|
||||
|
||||
|
||||
%struct.Foo = type { void (%struct.Foo*)** }
|
||||
@ -114,14 +114,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
|
||||
; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]
|
||||
; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
|
||||
; X64: movq %[[fp]], %r11
|
||||
; X64: callq __llvm_external_retpoline_r11
|
||||
; X64: callq __x86_indirect_thunk_r11
|
||||
; X64-DAG: movq %[[obj]], %rdi
|
||||
; X64-DAG: movq %[[fp]], %r11
|
||||
; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X64FAST-LABEL: vcall:
|
||||
; X64FAST: callq __llvm_external_retpoline_r11
|
||||
; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
|
||||
; X64FAST: callq __x86_indirect_thunk_r11
|
||||
; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL
|
||||
|
||||
; X86-LABEL: vcall:
|
||||
; X86: movl 8(%esp), %[[obj:[^ ]*]]
|
||||
@ -129,14 +129,14 @@ define void @vcall(%struct.Foo* %obj) #0 {
|
||||
; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
|
||||
; X86: movl %[[fp]], %eax
|
||||
; X86: pushl %[[obj]]
|
||||
; X86: calll __llvm_external_retpoline_eax
|
||||
; X86: calll __x86_indirect_thunk_eax
|
||||
; X86: addl $4, %esp
|
||||
; X86: movl %[[fp]], %eax
|
||||
; X86: jmp __llvm_external_retpoline_eax # TAILCALL
|
||||
; X86: jmp __x86_indirect_thunk_eax # TAILCALL
|
||||
|
||||
; X86FAST-LABEL: vcall:
|
||||
; X86FAST: calll __llvm_external_retpoline_eax
|
||||
; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
|
||||
; X86FAST: calll __x86_indirect_thunk_eax
|
||||
; X86FAST: jmp __x86_indirect_thunk_eax # TAILCALL
|
||||
|
||||
|
||||
declare void @direct_callee()
|
||||
|
42
test/CodeGen/X86/retpoline-regparm.ll
Normal file
42
test/CodeGen/X86/retpoline-regparm.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc -mtriple=i686-linux < %s | FileCheck --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" %s
|
||||
|
||||
; Test 32-bit retpoline when -mregparm=3 is used. This case is interesting
|
||||
; because there are no available scratch registers. The Linux kernel builds
|
||||
; with -mregparm=3, so we need to support it. TCO should fail because we need
|
||||
; to restore EDI.
|
||||
|
||||
define void @call_edi(void (i32, i32, i32)* %fp) #0 {
|
||||
entry:
|
||||
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: call_edi:
|
||||
; EDI is used, so it must be saved.
|
||||
; CHECK: pushl %edi
|
||||
; CHECK-DAG: xorl %eax, %eax
|
||||
; CHECK-DAG: xorl %edx, %edx
|
||||
; CHECK-DAG: xorl %ecx, %ecx
|
||||
; CHECK-DAG: movl {{.*}}, %edi
|
||||
; CHECK: calll __llvm_retpoline_edi
|
||||
; CHECK: popl %edi
|
||||
; CHECK: retl
|
||||
|
||||
define void @edi_external(void (i32, i32, i32)* %fp) #1 {
|
||||
entry:
|
||||
tail call void %fp(i32 inreg 0, i32 inreg 0, i32 inreg 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: edi_external:
|
||||
; CHECK: pushl %edi
|
||||
; CHECK-DAG: xorl %eax, %eax
|
||||
; CHECK-DAG: xorl %edx, %edx
|
||||
; CHECK-DAG: xorl %ecx, %ecx
|
||||
; CHECK-DAG: movl {{.*}}, %edi
|
||||
; CHECK: calll __x86_indirect_thunk_edi
|
||||
; CHECK: popl %edi
|
||||
; CHECK: retl
|
||||
|
||||
attributes #0 = { "target-features"="+retpoline" }
|
||||
attributes #1 = { "target-features"="+retpoline-external-thunk" }
|
@ -340,10 +340,10 @@ latch:
|
||||
; X86-NEXT: movl %edx, (%esp)
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
|
||||
; X86-NEXT: .hidden __llvm_retpoline_push
|
||||
; X86-NEXT: .weak __llvm_retpoline_push
|
||||
; X86: __llvm_retpoline_push:
|
||||
; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
|
||||
; X86-NEXT: .hidden __llvm_retpoline_edi
|
||||
; X86-NEXT: .weak __llvm_retpoline_edi
|
||||
; X86: __llvm_retpoline_edi:
|
||||
; X86-NEXT: # {{.*}} # %entry
|
||||
; X86-NEXT: calll [[CALL_TARGET:.*]]
|
||||
; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
|
||||
@ -355,11 +355,7 @@ latch:
|
||||
; X86-NEXT: .p2align 4, 0x90
|
||||
; X86-NEXT: [[CALL_TARGET]]: # Block address taken
|
||||
; X86-NEXT: # %entry
|
||||
; X86-NEXT: addl $4, %esp
|
||||
; X86-NEXT: pushl 4(%esp)
|
||||
; X86-NEXT: pushl 4(%esp)
|
||||
; X86-NEXT: popl 8(%esp)
|
||||
; X86-NEXT: popl (%esp)
|
||||
; X86-NEXT: movl %edi, (%esp)
|
||||
; X86-NEXT: retl
|
||||
|
||||
|
||||
|
88
test/DebugInfo/X86/void-typedef.ll
Normal file
88
test/DebugInfo/X86/void-typedef.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; Choosing CodeView generates debug metadata for class-scope typedefs that
|
||||
; Dwarf would normally omit. Choosing both CodeView and Dwarf triggered
|
||||
; assertion failures and crashes because the Dwarf handler wasn't prepared for
|
||||
; those records (in particular, ones with the void type represented by a
|
||||
; null pointer).
|
||||
;
|
||||
; This test was generated with:
|
||||
; clang++ -cc1 -emit-llvm -debug-info-kind=limited -dwarf-version=4 -gcodeview -x c++
|
||||
; on the following source code:
|
||||
;
|
||||
; class A {
|
||||
; typedef void _Nodeptr;
|
||||
; };
|
||||
; class B {
|
||||
; A FailedTestsCache;
|
||||
; bool m_fn1();
|
||||
; };
|
||||
; bool B::m_fn1() {}
|
||||
;
|
||||
; CodeView generates a DIDerivedType for the _Nodeptr typedef.
|
||||
;
|
||||
; RUN: llc %s -o - 2>&1 | FileCheck %s
|
||||
; CHECK-NOT: Assertion failed
|
||||
|
||||
; ModuleID = 'bug.cpp'
|
||||
source_filename = "bug.cpp"
|
||||
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
|
||||
target triple = "i686-pc-windows-msvc"
|
||||
|
||||
%class.B = type { %class.A }
|
||||
%class.A = type { i8 }
|
||||
|
||||
; Function Attrs: noinline nounwind optnone
|
||||
define x86_thiscallcc zeroext i1 @"\01?m_fn1@B@@AAE_NXZ"(%class.B* %this) #0 align 2 !dbg !9 {
|
||||
entry:
|
||||
%retval = alloca i1, align 1
|
||||
%this.addr = alloca %class.B*, align 4
|
||||
store %class.B* %this, %class.B** %this.addr, align 4
|
||||
call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !22, metadata !DIExpression()), !dbg !24
|
||||
%this1 = load %class.B*, %class.B** %this.addr, align 4
|
||||
call void @llvm.trap(), !dbg !25
|
||||
unreachable, !dbg !25
|
||||
|
||||
return: ; No predecessors!
|
||||
%0 = load i1, i1* %retval, align 1, !dbg !25
|
||||
ret i1 %0, !dbg !25
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
||||
|
||||
; Function Attrs: noreturn nounwind
|
||||
declare void @llvm.trap() #2
|
||||
|
||||
attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
attributes #2 = { noreturn nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6, !7}
|
||||
!llvm.ident = !{!8}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "<stdin>", directory: "D:\5Csrc\5Cbug", checksumkind: CSK_MD5, checksum: "2216f11c5ddda8c48a6f92a6079ad4b6")
|
||||
!2 = !{}
|
||||
!3 = !{i32 1, !"NumRegisterParameters", i32 0}
|
||||
!4 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!5 = !{i32 2, !"CodeView", i32 1}
|
||||
!6 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!7 = !{i32 1, !"wchar_size", i32 2}
|
||||
!8 = !{!"clang version 6.0.0 "}
|
||||
!9 = distinct !DISubprogram(name: "m_fn1", linkageName: "\01?m_fn1@B@@AAE_NXZ", scope: !11, file: !10, line: 8, type: !18, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !17, variables: !2)
|
||||
!10 = !DIFile(filename: "bug.cpp", directory: "D:\5Csrc\5Cbug", checksumkind: CSK_MD5, checksum: "2216f11c5ddda8c48a6f92a6079ad4b6")
|
||||
!11 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B", file: !10, line: 4, size: 8, elements: !12, identifier: ".?AVB@@")
|
||||
!12 = !{!13, !17}
|
||||
!13 = !DIDerivedType(tag: DW_TAG_member, name: "FailedTestsCache", scope: !11, file: !10, line: 5, baseType: !14, size: 8)
|
||||
!14 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !10, line: 1, size: 8, elements: !15, identifier: ".?AVA@@")
|
||||
!15 = !{!16}
|
||||
!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "_Nodeptr", scope: !14, file: !10, line: 2, baseType: null)
|
||||
!17 = !DISubprogram(name: "m_fn1", linkageName: "\01?m_fn1@B@@AAE_NXZ", scope: !11, file: !10, line: 6, type: !18, isLocal: false, isDefinition: false, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: false)
|
||||
!18 = !DISubroutineType(cc: DW_CC_BORLAND_thiscall, types: !19)
|
||||
!19 = !{!20, !21}
|
||||
!20 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
|
||||
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 32, flags: DIFlagArtificial | DIFlagObjectPointer)
|
||||
!22 = !DILocalVariable(name: "this", arg: 1, scope: !9, type: !23, flags: DIFlagArtificial | DIFlagObjectPointer)
|
||||
!23 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 32)
|
||||
!24 = !DILocation(line: 0, scope: !9)
|
||||
!25 = !DILocation(line: 8, scope: !9)
|
8
test/MC/AsmParser/inline_macro_duplication.ll
Normal file
8
test/MC/AsmParser/inline_macro_duplication.ll
Normal file
@ -0,0 +1,8 @@
|
||||
; RUN: not llc < %s 2>&1 | FileCheck %s
|
||||
|
||||
define void @test() {
|
||||
call void asm sideeffect ".macro FOO\0A.endm", "~{dirflag},~{fpsr},~{flags}"() #1
|
||||
call void asm sideeffect ".macro FOO\0A.endm", "~{dirflag},~{fpsr},~{flags}"() #1
|
||||
; CHECK: error: macro 'FOO' is already defined
|
||||
ret void
|
||||
}
|
@ -622,6 +622,11 @@ movl $12, foo(%rip)
|
||||
// CHECK: encoding: [0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
|
||||
// CHECK: fixup A - offset: 2, value: foo-8, kind: reloc_riprel_4byte
|
||||
|
||||
// rdar://37247000
|
||||
movl $12, 1024(%rip)
|
||||
// CHECK: movl $12, 1024(%rip)
|
||||
// CHECK: encoding: [0xc7,0x05,0x00,0x04,0x00,0x00,0x0c,0x00,0x00,0x00]
|
||||
|
||||
movq $12, foo(%rip)
|
||||
// CHECK: movq $12, foo(%rip)
|
||||
// CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
|
||||
|
@ -722,6 +722,114 @@ define <2 x half> @constant_rtz_pkrtz() {
|
||||
ret <2 x half> %cvt
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.cvt.pknorm.i16
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: @undef_lhs_cvt_pknorm_i16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
|
||||
define <2 x i16> @undef_lhs_cvt_pknorm_i16(float %y) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float %y)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_rhs_cvt_pknorm_i16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
|
||||
define <2 x i16> @undef_rhs_cvt_pknorm_i16(float %x) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_cvt_pknorm_i16(
|
||||
; CHECK: ret <2 x i16> undef
|
||||
define <2 x i16> @undef_cvt_pknorm_i16() {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float undef, float undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.cvt.pknorm.u16
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: @undef_lhs_cvt_pknorm_u16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
|
||||
define <2 x i16> @undef_lhs_cvt_pknorm_u16(float %y) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float %y)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_rhs_cvt_pknorm_u16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
|
||||
define <2 x i16> @undef_rhs_cvt_pknorm_u16(float %x) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_cvt_pknorm_u16(
|
||||
; CHECK: ret <2 x i16> undef
|
||||
define <2 x i16> @undef_cvt_pknorm_u16() {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float undef, float undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.cvt.pk.i16
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: @undef_lhs_cvt_pk_i16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
|
||||
define <2 x i16> @undef_lhs_cvt_pk_i16(i32 %y) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 %y)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_rhs_cvt_pk_i16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
|
||||
define <2 x i16> @undef_rhs_cvt_pk_i16(i32 %x) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_cvt_pk_i16(
|
||||
; CHECK: ret <2 x i16> undef
|
||||
define <2 x i16> @undef_cvt_pk_i16() {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 undef, i32 undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.cvt.pk.u16
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) nounwind readnone
|
||||
|
||||
; CHECK-LABEL: @undef_lhs_cvt_pk_u16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
|
||||
define <2 x i16> @undef_lhs_cvt_pk_u16(i32 %y) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 %y)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_rhs_cvt_pk_u16(
|
||||
; CHECK: %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
|
||||
define <2 x i16> @undef_rhs_cvt_pk_u16(i32 %x) {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @undef_cvt_pk_u16(
|
||||
; CHECK: ret <2 x i16> undef
|
||||
define <2 x i16> @undef_cvt_pk_u16() {
|
||||
%cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 undef, i32 undef)
|
||||
ret <2 x i16> %cvt
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; llvm.amdgcn.ubfe
|
||||
; --------------------------------------------------------------------
|
||||
|
Loading…
x
Reference in New Issue
Block a user