Vendor import of llvm release_80 branch r351543:

https://llvm.org/svn/llvm-project/llvm/branches/release_80@351543
This commit is contained in:
Dimitry Andric 2019-01-19 18:44:22 +00:00
parent d8e91e4626
commit 3edec5c15a
51 changed files with 1455 additions and 307 deletions

View File

@ -21,7 +21,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
set(LLVM_VERSION_PATCH 0)
endif()
if(NOT DEFINED LLVM_VERSION_SUFFIX)
set(LLVM_VERSION_SUFFIX svn)
set(LLVM_VERSION_SUFFIX "")
endif()
if (NOT PACKAGE_VERSION)

View File

@ -329,6 +329,7 @@ class MachineFunction {
bool CallsUnwindInit = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
bool HasLocalEscape = false;
/// List of C++ TypeInfo used.
std::vector<const GlobalValue *> TypeInfos;
@ -811,6 +812,9 @@ class MachineFunction {
bool hasEHFunclets() const { return HasEHFunclets; }
void setHasEHFunclets(bool V) { HasEHFunclets = V; }
bool hasLocalEscape() const { return HasLocalEscape; }
void setHasLocalEscape(bool V) { HasLocalEscape = V; }
/// Find or create an LandingPadInfo for the specified MachineBasicBlock.
LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);

View File

@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
[IntrArgMemOnly, NoCapture<0>]
>;
class AMDGPUDSOrderedIntrinsic : Intrinsic<
[llvm_i32_ty],
// M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
// the bit packing can be optimized at the IR level.
[LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
llvm_i32_ty, // value to add or swap
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty, // isVolatile
llvm_i32_ty, // ordered count index (OA index), also added to the address
llvm_i1_ty, // wave release, usually set to 1
llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
[NoCapture<0>]
>;
def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;

View File

@ -545,15 +545,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
OS.AddComment(Comment);
};
// Emit a label assignment with the SEH frame offset so we can use it for
// llvm.eh.recoverfp.
StringRef FLinkageName =
GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
if (!isAArch64) {
// Emit a label assignment with the SEH frame offset so we can use it for
// llvm.eh.recoverfp.
StringRef FLinkageName =
GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
}
// Use the assembler to compute the number of table entries through label
// difference and division.
@ -937,6 +939,9 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
unsigned UnusedReg;
// FIXME: getFrameIndexReference needs to match the behavior of
// AArch64RegisterInfo::hasBasePointer in which one of the scenarios where
// SP is used is if frame size >= 256.
Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
}

View File

@ -6182,6 +6182,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
.addFrameIndex(FI);
}
MF.setHasLocalEscape(true);
return nullptr;
}

View File

@ -453,6 +453,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
}
}
// Returns the epilog symbol of an epilog with the exact same unwind code
// sequence, if it exists. Otherwise, returns nulltpr.
// EpilogInstrs - Unwind codes for the current epilog.
// Epilogs - Epilogs that potentialy match the current epilog.
static MCSymbol*
FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
const std::vector<MCSymbol *>& Epilogs,
const WinEH::FrameInfo *info) {
for (auto *EpilogStart : Epilogs) {
auto InstrsIter = info->EpilogMap.find(EpilogStart);
assert(InstrsIter != info->EpilogMap.end() &&
"Epilog not found in EpilogMap");
const auto &Instrs = InstrsIter->second;
if (Instrs.size() != EpilogInstrs.size())
continue;
bool Match = true;
for (unsigned i = 0; i < Instrs.size(); ++i)
if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
Instrs[i].Offset != EpilogInstrs[i].Offset ||
Instrs[i].Register != EpilogInstrs[i].Register) {
Match = false;
break;
}
if (Match)
return EpilogStart;
}
return nullptr;
}
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@ -477,12 +509,28 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
// Epilogs processed so far.
std::vector<MCSymbol *> AddedEpilogs;
for (auto &I : info->EpilogMap) {
MCSymbol *EpilogStart = I.first;
auto &EpilogInstrs = I.second;
uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
EpilogInfo[EpilogStart] = TotalCodeBytes;
TotalCodeBytes += CodeBytes;
MCSymbol* MatchingEpilog =
FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
if (MatchingEpilog) {
assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
"Duplicate epilog not found");
EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog];
// Clear the unwind codes in the EpilogMap, so that they don't get output
// in the logic below.
EpilogInstrs.clear();
} else {
EpilogInfo[EpilogStart] = TotalCodeBytes;
TotalCodeBytes += CodeBytes;
AddedEpilogs.push_back(EpilogStart);
}
}
// Code Words, Epilog count, E, X, Vers, Function Length

View File

@ -694,6 +694,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
case AArch64::MOVMCSym: {
unsigned DestReg = MI->getOperand(0).getReg();
const MachineOperand &MO_Sym = MI->getOperand(1);
MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
MCOperand Hi_MCSym, Lo_MCSym;
Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
MCInst MovZ;
MovZ.setOpcode(AArch64::MOVZXi);
MovZ.addOperand(MCOperand::createReg(DestReg));
MovZ.addOperand(Hi_MCSym);
MovZ.addOperand(MCOperand::createImm(16));
EmitToStreamer(*OutStreamer, MovZ);
MCInst MovK;
MovK.setOpcode(AArch64::MOVKXi);
MovK.addOperand(MCOperand::createReg(DestReg));
MovK.addOperand(MCOperand::createReg(DestReg));
MovK.addOperand(Lo_MCSym);
MovK.addOperand(MCOperand::createImm(0));
EmitToStreamer(*OutStreamer, MovK);
return;
}
case AArch64::MOVIv2d_ns:
// If the target has <rdar://problem/16473581>, lower this
// instruction to movi.16b instead.

View File

@ -228,6 +228,10 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
return true;
// Win64 SEH requires frame pointer if funclets are present.
if (MF.hasLocalEscape())
return true;
return false;
}

View File

@ -2743,6 +2743,34 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::localaddress: {
// Returns one of the stack, base, or frame pointer registers, depending on
// which is used to reference local variables.
MachineFunction &MF = DAG.getMachineFunction();
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned Reg;
if (RegInfo->hasBasePointer(MF))
Reg = RegInfo->getBaseRegister();
else // This function handles the SP or FP case.
Reg = RegInfo->getFrameRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
}
}

View File

@ -133,6 +133,10 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
"NegativeImmediates">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
@ -6801,5 +6805,8 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"

View File

@ -466,6 +466,13 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Modify MI as necessary to handle as much of 'Offset' as possible
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
FI.ChangeToImmediate(Offset);
return;
}
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;

View File

@ -254,7 +254,7 @@ namespace AMDGPUAS {
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
REGION_ADDRESS = 2, ///< Address space for region memory.
REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.

View File

@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(DS_ORDERED_COUNT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)

View File

@ -474,6 +474,7 @@ enum NodeType : unsigned {
TBUFFER_STORE_FORMAT_D16,
TBUFFER_LOAD_FORMAT,
TBUFFER_LOAD_FORMAT_D16,
DS_ORDERED_COUNT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,

View File

@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;

View File

@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {

View File

@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
def : Pat <
(SIds_ordered_count i32:$value, i16:$offset),
(DS_ORDERED_COUNT $value, (as_i16imm $offset))
>;
//===----------------------------------------------------------------------===//
// Real instructions
//===----------------------------------------------------------------------===//

View File

@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {
}
}
static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
const MachineInstr &MI) {
if (TII.isAlwaysGDS(MI.getOpcode()))
return true;
switch (MI.getOpcode()) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
case AMDGPU::S_TTRACEDATA:
return true;
// These DS opcodes don't support GDS.
case AMDGPU::DS_NOP:
case AMDGPU::DS_PERMUTE_B32:
case AMDGPU::DS_BPERMUTE_B32:
return false;
default:
// TODO: GDS
if (TII.isDS(MI.getOpcode())) {
int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::gds);
if (MI.getOperand(GDS).getImm())
return true;
}
return false;
}
}
@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
checkReadM0Hazards(MI) > 0)
return NoopHazard;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
isSMovRel(MI->getOpcode())))
return std::max(WaitStates, checkReadM0Hazards(MI));
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
return WaitStates;

View File

@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDLoc DL(Op);
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
MemSDNode *M = cast<MemSDNode>(Op);
SDValue Chain = M->getOperand(0);
SDValue M0 = M->getOperand(2);
SDValue Value = M->getOperand(3);
unsigned OrderedCountIndex = M->getConstantOperandVal(7);
unsigned WaveRelease = M->getConstantOperandVal(8);
unsigned WaveDone = M->getConstantOperandVal(9);
unsigned ShaderType;
unsigned Instruction;
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
Instruction = 0;
break;
case Intrinsic::amdgcn_ds_ordered_swap:
Instruction = 1;
break;
}
if (WaveDone && !WaveRelease)
report_fatal_error("ds_ordered_count: wave_done requires wave_release");
switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
ShaderType = 0;
break;
case CallingConv::AMDGPU_PS:
ShaderType = 1;
break;
case CallingConv::AMDGPU_VS:
ShaderType = 2;
break;
case CallingConv::AMDGPU_GS:
ShaderType = 3;
break;
default:
report_fatal_error("ds_ordered_count unsupported for this calling conv");
}
unsigned Offset0 = OrderedCountIndex << 2;
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
(Instruction << 4);
unsigned Offset = Offset0 | (Offset1 << 8);
SDValue Ops[] = {
Chain,
Value,
DAG.getTargetConstant(Offset, DL, MVT::i16),
copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
};
return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
M->getVTList(), Ops, M->getMemoryVT(),
M->getMemOperand());
}
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fadd:

View File

@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
CurrScore);
}
if (Inst.mayStore()) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
CurrScore);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data0) != -1) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
CurrScore);
}
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data1) != -1) {
setExpScore(&Inst, TII, TRI, MRI,
@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
if (TII->isAlwaysGDS(Inst.getOpcode()) ||
TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
} else {

View File

@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
return Opcode == AMDGPU::DS_ORDERED_COUNT ||
Opcode == AMDGPU::DS_GWS_INIT ||
Opcode == AMDGPU::DS_GWS_SEMA_V ||
Opcode == AMDGPU::DS_GWS_SEMA_BR ||
Opcode == AMDGPU::DS_GWS_SEMA_P ||
Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
Opcode == AMDGPU::DS_GWS_BARRIER;
}
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// EXEC = 0, but checking for that case here seems not worth it
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
Opcode == AMDGPU::DS_ORDERED_COUNT)
return true;
if (MI.isInlineAsm())

View File

@ -450,6 +450,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
bool isAlwaysGDS(uint16_t Opcode) const;
static bool isMIMG(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
}

View File

@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
[SDNPMayLoad, SDNPMemOperand]
>;
def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
>;
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;

View File

@ -17,6 +17,7 @@
#include "MSP430InstrInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -28,6 +29,7 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
@ -44,6 +46,8 @@ namespace {
StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
bool runOnMachineFunction(MachineFunction &MF) override;
void printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char* Modifier = nullptr);
void printSrcMemOperand(const MachineInstr *MI, int OpNum,
@ -55,6 +59,8 @@ namespace {
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
void EmitInterruptVectorSection(MachineFunction &ISR);
};
} // end of anonymous namespace
@ -153,6 +159,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
MCSection *Cur = OutStreamer->getCurrentSectionOnly();
const auto *F = &ISR.getFunction();
assert(F->hasFnAttribute("interrupt") &&
"Functions with MSP430_INTR CC should have 'interrupt' attribute");
StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
MCSection *IV = OutStreamer->getContext().getELFSection(
"__interrupt_vector_" + IVIdx,
ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
OutStreamer->SwitchSection(IV);
const MCSymbol *FunctionSymbol = getSymbol(F);
OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
OutStreamer->SwitchSection(Cur);
}
bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit separate section for an interrupt vector if ISR
if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
EmitInterruptVectorSection(MF);
SetupMachineFunction(MF);
EmitFunctionBody();
return false;
}
// Force static initialization.
extern "C" void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());

View File

@ -27202,6 +27202,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
case X86ISD::VSHLV: return "X86ISD::VSHLV";
case X86ISD::VSRLV: return "X86ISD::VSRLV";
case X86ISD::VSRAV: return "X86ISD::VSRAV";
case X86ISD::VROTLI: return "X86ISD::VROTLI";
case X86ISD::VROTRI: return "X86ISD::VROTRI";

View File

@ -315,10 +315,8 @@ namespace llvm {
// Vector shift elements
VSHL, VSRL, VSRA,
// Vector variable shift right arithmetic.
// Unlike ISD::SRA, in case shift count greater then element size
// use sign bit to fill destination data element.
VSRAV,
// Vector variable shift
VSHLV, VSRLV, VSRAV,
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,

View File

@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoV
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
// Special handing for handling VPSRAV intrinsics.
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> {
multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode,
X86VectorVTInfo _, list<Predicate> p> {
let Predicates = p in {
def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
_.RC:$src2)>;
def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
(OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
(OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
}
}
multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> :
avx512_var_shift_int_lowering<InstrStr, _, p> {
multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode,
X86VectorVTInfo _,
list<Predicate> p> :
avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> {
let Predicates = p in {
def : Pat<(_.VT (X86vsrav _.RC:$src1,
def : Pat<(_.VT (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
@ -6498,15 +6499,47 @@ multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
}
}
defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo,
Predicate p> {
defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>;
defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256,
[HasVLX, p]>;
defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128,
[HasVLX, p]>;
}
multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo,
Predicate p> {
defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>;
defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256,
[HasVLX, p]>;
defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128,
[HasVLX, p]>;
}
defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info,
HasBWI>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav,
avx512vl_i32_info, HasAVX512>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav,
avx512vl_i64_info, HasAVX512>;
defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info,
HasBWI>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv,
avx512vl_i32_info, HasAVX512>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv,
avx512vl_i64_info, HasAVX512>;
defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info,
HasBWI>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv,
avx512vl_i32_info, HasAVX512>;
defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv,
avx512vl_i64_info, HasAVX512>;
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {

View File

@ -198,6 +198,8 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<0>]>;
def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>;

View File

@ -8318,7 +8318,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
// Variable Bit Shifts
//
multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, ValueType vt256> {
SDNode IntrinNode, ValueType vt128, ValueType vt256> {
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@ -8347,23 +8347,23 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
(vt256 (load addr:$src2)))))]>,
VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
SchedWriteVarVecShift.YMM.ReadAfterFold]>;
def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)),
(!cast<Instruction>(NAME#"rr") VR128:$src1, VR128:$src2)>;
def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))),
(!cast<Instruction>(NAME#"rm") VR128:$src1, addr:$src2)>;
def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)),
(!cast<Instruction>(NAME#"Yrr") VR256:$src1, VR256:$src2)>;
def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))),
(!cast<Instruction>(NAME#"Yrm") VR256:$src1, addr:$src2)>;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
(VPSRAVDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
(VPSRAVDrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
(VPSRAVDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
(VPSRAVDYrm VR256:$src1, addr:$src2)>;
defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>;
defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>;
defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>;
}
//===----------------------------------------------------------------------===//

View File

@ -389,10 +389,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
@ -405,10 +405,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@ -943,11 +943,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
@ -971,11 +971,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),

View File

@ -3065,9 +3065,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
I->isTerminator())
return false;
// Do not sink alloca instructions out of the entry block.
if (isa<AllocaInst>(I) && I->getParent() ==
&DestBlock->getParent()->getEntryBlock())
// Do not sink static or dynamic alloca instructions. Static allocas must
// remain in the entry block, and dynamic allocas must not be sunk in between
// a stacksave / stackrestore pair, which would incorrectly shorten its
// lifetime.
if (isa<AllocaInst>(I))
return false;
// Do not sink into catchswitch blocks.

View File

@ -3031,7 +3031,10 @@ class llvm::sroa::AllocaSliceRewriter
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
// Lifetime intrinsics always expect an i8* so directly get such a pointer
// for the new alloca slice.
Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);

View File

@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i])) {
if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;

View File

@ -0,0 +1,67 @@
; RUN: llc -mtriple arm64-windows -o - %s | FileCheck %s
; Function Attrs: noinline optnone uwtable
define dso_local i32 @foo() {
entry:
; CHECK-LABEL: foo
; CHECK: orr w8, wzr, #0x1
; CHECK: mov w0, wzr
; CHECK: mov x1, x29
; CHECK: .set .Lfoo$frame_escape_0, -4
; CHECK: stur w8, [x29, #-4]
; CHECK: bl "?fin$0@0@foo@@"
; CHECK: ldur w0, [x29, #-4]
%count = alloca i32, align 4
call void (...) @llvm.localescape(i32* %count)
store i32 0, i32* %count, align 4
%0 = load i32, i32* %count, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %count, align 4
%1 = call i8* @llvm.localaddress()
call void @"?fin$0@0@foo@@"(i8 0, i8* %1)
%2 = load i32, i32* %count, align 4
ret i32 %2
}
define internal void @"?fin$0@0@foo@@"(i8 %abnormal_termination, i8* %frame_pointer) {
entry:
; CHECK-LABEL: @"?fin$0@0@foo@@"
; CHECK: sub sp, sp, #16
; CHECK: str x1, [sp, #8]
; CHECK: strb w0, [sp, #7]
; CHECK: movz x8, #:abs_g1_s:.Lfoo$frame_escape_0
; CHECK: movk x8, #:abs_g0_nc:.Lfoo$frame_escape_0
; CHECK: add x8, x1, x8
; CHECK: ldr w9, [x8]
; CHECK: add w9, w9, #1
; CHECK: str w9, [x8]
%frame_pointer.addr = alloca i8*, align 8
%abnormal_termination.addr = alloca i8, align 1
%0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0)
%count = bitcast i8* %0 to i32*
store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
%1 = zext i8 %abnormal_termination to i32
%cmp = icmp eq i32 %1, 0
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%2 = load i32, i32* %count, align 4
%add = add nsw i32 %2, 1
store i32 %add, i32* %count, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
; Function Attrs: nounwind readnone
declare i8* @llvm.localrecover(i8*, i8*, i32)
; Function Attrs: nounwind readnone
declare i8* @llvm.localaddress()
; Function Attrs: nounwind
declare void @llvm.localescape(...)

View File

@ -0,0 +1,30 @@
; RUN: llc -mtriple arm64-windows %s -o - | FileCheck %s
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @foo() {
entry:
; CHECK-LABEL: foo
; CHECK: .set .Lfoo$frame_escape_0, -4
%count = alloca i32, align 4
call void (...) @llvm.localescape(i32* %count)
ret i32 0
}
define internal i32 @"?filt$0@0@foo@@"(i8* %exception_pointers, i8* %frame_pointer) {
entry:
; CHECK-LABEL: @"?filt$0@0@foo@@"
; CHECK: movz x8, #:abs_g1_s:.Lfoo$frame_escape_0
; CHECK: movk x8, #:abs_g0_nc:.Lfoo$frame_escape_0
%0 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @foo to i8*), i8* %frame_pointer, i32 0)
%count = bitcast i8* %0 to i32*
%1 = load i32, i32* %count, align 4
ret i32 %1
}
; Function Attrs: nounwind readnone
declare i8* @llvm.localrecover(i8*, i8*, i32) #2
; Function Attrs: nounwind
declare void @llvm.localescape(...) #3

View File

@ -1,7 +1,7 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \
# RUN: -disable-branch-fold -filetype=obj \
# RUN: | llvm-readobj -unwind | FileCheck %s
# Check that multiple epilgoues are correctly placed in .xdata.
# Check that identical multiple epilgoues are correctly shared in .xdata.
# CHECK: ExceptionData {
# CHECK-NEXT: FunctionLength: 164
@ -9,7 +9,7 @@
# CHECK-NEXT: ExceptionData: No
# CHECK-NEXT: EpiloguePacked: No
# CHECK-NEXT: EpilogueScopes: 2
# CHECK-NEXT: ByteCodeLength: 48
# CHECK-NEXT: ByteCodeLength: 32
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80]
@ -37,7 +37,7 @@
# CHECK-NEXT: }
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 33
# CHECK-NEXT: EpilogueStartIndex: 30
# CHECK-NEXT: EpilogueStartIndex: 15
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80]

View File

@ -0,0 +1,225 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-after=prologepilog \
# RUN: -disable-branch-fold -filetype=obj \
# RUN: | llvm-readobj -unwind | FileCheck %s
# Check that non-identical multiple epilgoues are correctly shared in .xdata.
# CHECK: ExceptionData {
# CHECK-NEXT: FunctionLength: 160
# CHECK-NEXT: Version: 0
# CHECK-NEXT: ExceptionData: No
# CHECK-NEXT: EpiloguePacked: No
# CHECK-NEXT: EpilogueScopes: 2
# CHECK-NEXT: ByteCodeLength: 44
# CHECK-NEXT: Prologue [
# CHECK-NEXT: 0xc80c ; stp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; stp x21, x22, [sp, #80]
# CHECK-NEXT: 0xc908 ; stp x23, x24, [sp, #64]
# CHECK-NEXT: 0xc986 ; stp x25, x26, [sp, #48]
# CHECK-NEXT: 0xca04 ; stp x27, x28, [sp, #32]
# CHECK-NEXT: 0xd802 ; stp d8, d9, [sp, #16]
# CHECK-NEXT: 0xda8d ; stp d10, d11, [sp, #-112]!
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: EpilogueScopes [
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 16
# CHECK-NEXT: EpilogueStartIndex: 15
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80]
# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64]
# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48]
# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16]
# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: EpilogueScope {
# CHECK-NEXT: StartOffset: 32
# CHECK-NEXT: EpilogueStartIndex: 28
# CHECK-NEXT: Opcodes [
# CHECK-NEXT: 0xc80c ; ldp x19, x20, [sp, #96]
# CHECK-NEXT: 0xc88a ; ldp x21, x22, [sp, #80]
# CHECK-NEXT: 0xc908 ; ldp x23, x24, [sp, #64]
# CHECK-NEXT: 0xc986 ; ldp x25, x26, [sp, #48]
# CHECK-NEXT: 0xca04 ; ldp x27, x28, [sp, #32]
# CHECK-NEXT: 0xd802 ; ldp d8, d9, [sp, #16]
# CHECK-NEXT: 0xda8d ; ldp d10, d11, [sp], #112
# CHECK-NEXT: 0xe4 ; end
# CHECK-NEXT: ]
# CHECK-NEXT: }
# CHECK-NEXT: ]
# CHECK-NEXT: }
...
---
name: test
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: true
registers:
liveins:
- { reg: '$w0', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 112
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
hasOpaqueSPAdjustment: true
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack:
stack:
- { id: 0, name: '', type: spill-slot, offset: -8, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x19', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -16, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x20', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -24, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x21', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -32, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x22', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 4, name: '', type: spill-slot, offset: -40, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x23', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 5, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x24', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 6, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x25', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 7, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x26', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 8, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x27', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 9, name: '', type: spill-slot, offset: -80, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$x28', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 10, name: '', type: spill-slot, offset: -88, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$d8', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 11, name: '', type: spill-slot, offset: -96, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$d9', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 12, name: '', type: spill-slot, offset: -104, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$d10', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 13, name: '', type: spill-slot, offset: -112, size: 8, alignment: 8,
stack-id: 0, callee-saved-register: '$d11', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
constants:
body: |
bb.0.entry:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20
early-clobber $sp = frame-setup STPDpre killed $d10, killed $d11, $sp, -14 :: (store 8 into %stack.12), (store 8 into %stack.13)
frame-setup SEH_SaveFRegP_X 10, 11, -112
frame-setup STPDi killed $d8, killed $d9, $sp, 2 :: (store 8 into %stack.10), (store 8 into %stack.11)
frame-setup SEH_SaveFRegP 8, 9, 16
frame-setup STPXi killed $x27, killed $x28, $sp, 4 :: (store 8 into %stack.8), (store 8 into %stack.9)
frame-setup SEH_SaveRegP 27, 28, 32
frame-setup STPXi killed $x25, killed $x26, $sp, 6 :: (store 8 into %stack.6), (store 8 into %stack.7)
frame-setup SEH_SaveRegP 25, 26, 48
frame-setup STPXi killed $x23, killed $x24, $sp, 8 :: (store 8 into %stack.4), (store 8 into %stack.5)
frame-setup SEH_SaveRegP 23, 24, 64
frame-setup STPXi killed $x21, killed $x22, $sp, 10 :: (store 8 into %stack.2), (store 8 into %stack.3)
frame-setup SEH_SaveRegP 21, 22, 80
frame-setup STPXi killed $x19, killed $x20, $sp, 12 :: (store 8 into %stack.0), (store 8 into %stack.1)
frame-setup SEH_SaveRegP 19, 20, 96
frame-setup SEH_PrologEnd
frame-setup CFI_INSTRUCTION def_cfa_offset 112
frame-setup CFI_INSTRUCTION offset $w19, -8
frame-setup CFI_INSTRUCTION offset $w20, -16
frame-setup CFI_INSTRUCTION offset $w21, -24
frame-setup CFI_INSTRUCTION offset $w22, -32
frame-setup CFI_INSTRUCTION offset $w23, -40
frame-setup CFI_INSTRUCTION offset $w24, -48
frame-setup CFI_INSTRUCTION offset $w25, -56
frame-setup CFI_INSTRUCTION offset $w26, -64
frame-setup CFI_INSTRUCTION offset $w27, -72
frame-setup CFI_INSTRUCTION offset $w28, -80
frame-setup CFI_INSTRUCTION offset $b8, -88
frame-setup CFI_INSTRUCTION offset $b9, -96
frame-setup CFI_INSTRUCTION offset $b10, -104
frame-setup CFI_INSTRUCTION offset $b11, -112
$x19 = ADDXrr $x0, killed $x1
$d8 = FADDDrr killed $d0, $d1
$d9 = FADDDrr $d8, $d1
$d10 = FADDDrr $d9, $d8
$d11 = FADDDrr killed $d9, $d10
$x20 = SUBSXrr $x19, killed $x0, implicit-def $nzcv
Bcc 1, %bb.2, implicit killed $nzcv
B %bb.1
bb.1:
liveins: $x19, $x20
$x21 = ADDXrr $x20, killed $x19
$x22 = ADDXrr $x21, killed $x20
$x23 = ADDXrr $x22, killed $x21
$x24 = ADDXrr $x23, killed $x22
$x25 = ADDXrr $x24, killed $x23
$x26 = ADDXrr $x25, killed $x24
$x27 = ADDXrr $x26, killed $x25
$x28 = ADDXrr $x27, killed $x26
$x0 = COPY $x28
frame-destroy SEH_EpilogStart
$x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1)
frame-destroy SEH_SaveRegP 19, 20, 96
$x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3)
frame-destroy SEH_SaveRegP 21, 22, 80
$x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5)
frame-destroy SEH_SaveRegP 23, 24, 64
$x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7)
frame-destroy SEH_SaveRegP 25, 26, 48
$x27, $x28 = frame-destroy LDPXi $sp, 4 :: (load 8 from %stack.8), (load 8 from %stack.9)
frame-destroy SEH_SaveRegP 27, 28, 32
$d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11)
frame-destroy SEH_SaveFRegP 8, 9, 16
early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13)
frame-destroy SEH_SaveFRegP_X 10, 11, -112
frame-destroy SEH_EpilogEnd
RET_ReallyLR implicit $x0
bb.2:
liveins: $x28, $d11
$x0 = COPY $d11
$x0 = ADDXrr $x0, killed $x28
frame-destroy SEH_EpilogStart
$x19, $x20 = frame-destroy LDPXi $sp, 12 :: (load 8 from %stack.0), (load 8 from %stack.1)
frame-destroy SEH_SaveRegP 19, 20, 96
$x21, $x22 = frame-destroy LDPXi $sp, 10 :: (load 8 from %stack.2), (load 8 from %stack.3)
frame-destroy SEH_SaveRegP 21, 22, 80
$x23, $x24 = frame-destroy LDPXi $sp, 8 :: (load 8 from %stack.4), (load 8 from %stack.5)
frame-destroy SEH_SaveRegP 23, 24, 64
$x25, $x26 = frame-destroy LDPXi $sp, 6 :: (load 8 from %stack.6), (load 8 from %stack.7)
frame-destroy SEH_SaveRegP 25, 26, 48
$d8, $d9 = frame-destroy LDPDi $sp, 2 :: (load 8 from %stack.10), (load 8 from %stack.11)
frame-destroy SEH_SaveFRegP 8, 9, 16
early-clobber $sp, $d10, $d11 = frame-destroy LDPDpost $sp, 14 :: (load 8 from %stack.12), (load 8 from %stack.13)
frame-destroy SEH_SaveFRegP_X 10, 11, -112
frame-destroy SEH_EpilogEnd
RET_ReallyLR implicit $x0
...

View File

@ -0,0 +1,96 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; FUNC-LABEL: {{^}}ds_ordered_add:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
store i32 %val, i32 addrspace(1)* %out
ret void
}
; Below are various modifications of input operands and shader types.
; FUNC-LABEL: {{^}}ds_ordered_add_counter2:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
store i32 %val, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
store i32 %val, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
store i32 %val, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}ds_ordered_add_cs:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
; FUNC-LABEL: {{^}}ds_ordered_add_ps:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
; FUNC-LABEL: {{^}}ds_ordered_add_vs:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
; FUNC-LABEL: {{^}}ds_ordered_add_gs:
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)

View File

@ -0,0 +1,45 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
; FUNC-LABEL: {{^}}ds_ordered_swap:
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
%val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
; GCN: s_cbranch_execz [[BB:BB._.]]
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
; GCN-NEXT: [[BB]]:
; // Wait for expcnt(0) before modifying EXEC
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
entry:
%c = icmp ne i32 %value, 0
br i1 %c, label %if-true, label %endif
if-true:
%val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
br label %endif
endif:
%v = phi i32 [ %val, %if-true ], [ undef, %entry ]
%r = bitcast i32 %v to float
ret float %r
}
declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)

View File

@ -3,7 +3,7 @@
target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
target triple = "msp430-unknown-linux-gnu"
define msp430_intrcc void @foo() nounwind {
define msp430_intrcc void @foo() nounwind #0 {
entry:
%fa = call i8* @llvm.frameaddress(i32 0)
store i8 0, i8* %fa
@ -11,3 +11,5 @@ entry:
}
declare i8* @llvm.frameaddress(i32)
attributes #0 = { noinline nounwind optnone "interrupt"="2" }

View File

@ -27,3 +27,5 @@ define msp430_intrcc void @fpb_alloced() #0 {
call void asm sideeffect "nop", "r"(i8 0)
ret void
}
attributes #0 = { noinline nounwind optnone "interrupt"="2" }

View File

@ -13,6 +13,9 @@ target triple = "msp430-generic-generic"
; instruction RETI, which restores the SR register and branches to the PC where
; the interrupt occurred.
; CHECK: .section __interrupt_vector_2,"ax",@progbits
; CHECK-NEXT: .short ISR
@g = global float 0.0
define msp430_intrcc void @ISR() #0 {
@ -47,3 +50,4 @@ entry:
ret void
}
attributes #0 = { noinline nounwind optnone "interrupt"="2" }

View File

@ -1183,38 +1183,58 @@ define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
define <4 x i32> @test_x86_avx2_psllv_d_const() {
; X86-AVX-LABEL: test_x86_avx2_psllv_d_const:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u>
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx2_psllv_d_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9]
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u>
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9]
; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
%res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
@ -1241,38 +1261,62 @@ define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
define <8 x i32> @test_x86_avx2_psllv_d_256_const() {
; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8]
; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0>
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0>
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
%res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
@ -1316,14 +1360,20 @@ define <2 x i64> @test_x86_avx2_psllv_q_const() {
;
; X64-AVX-LABEL: test_x86_avx2_psllv_q_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00]
; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,18446744073709551615]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00]
; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,18446744073709551615]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
ret <2 x i64> %res
@ -1366,15 +1416,19 @@ define <4 x i64> @test_x86_avx2_psllv_q_256_const() {
;
; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8,8,8,8]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,4,4,18446744073709551615]
; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8]
; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
@ -1400,38 +1454,62 @@ define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
define <4 x i32> @test_x86_avx2_psrlv_d_const() {
; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u>
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A]
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u>
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
%res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
@ -1458,38 +1536,62 @@ define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
define <8 x i32> @test_x86_avx2_psrlv_d_256_const() {
; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
; X86-AVX-NEXT: retl # encoding: [0xc3]
;
; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
; X86-AVX512VL: # %bb.0:
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0>
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A]
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295]
; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1]
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0>
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
%res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
@ -1534,14 +1636,20 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() {
;
; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4]
; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0]
; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4]
; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
ret <2 x i64> %res
@ -1585,15 +1693,19 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() {
;
; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,2,2,2]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
; X64-AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4]
; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX-NEXT: retq # encoding: [0xc3]
;
; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const:
; X64-AVX512VL: # %bb.0:
; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2]
; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4]
; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A]
; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
%res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)

View File

@ -5229,8 +5229,11 @@ define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
define <16 x i32> @test_x86_avx512_psllv_d_512_const() {
; CHECK-LABEL: test_x86_avx512_psllv_d_512_const:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,u,0,32,5,u,0,80,3,u,0>
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0]
; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295]
; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm1, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
%res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
@ -5277,8 +5280,11 @@ define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
define <8 x i64> @test_x86_avx512_psllv_q_512_const() {
; CHECK-LABEL: test_x86_avx512_psllv_q_512_const:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,18446744056529682432,0>
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0]
; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615]
; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm1, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
%res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
@ -5397,8 +5403,11 @@ define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
define <16 x i32> @test_x86_avx512_psrlv_d_512_const() {
; CHECK-LABEL: test_x86_avx512_psrlv_d_512_const:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,u,0,0,5,u,0,0,3,u,0>
; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0]
; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295]
; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
%res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1 >)
@ -5445,8 +5454,11 @@ define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
define <8 x i64> @test_x86_avx512_psrlv_q_512_const() {
; CHECK-LABEL: test_x86_avx512_psrlv_q_512_const:
; CHECK: ## %bb.0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,1073741823,0>
; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0]
; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615]
; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1
; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
%res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)

View File

@ -1158,15 +1158,19 @@ declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind
define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize {
; X86-LABEL: test_x86_avx512_psrlv_w_512_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx512_psrlv_w_512_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
@ -1377,15 +1381,19 @@ declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind r
define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize {
; X86-LABEL: test_x86_avx512_psllv_w_512_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_avx512_psllv_w_512_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A]
; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)

View File

@ -2021,16 +2021,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1
define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize {
; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2]
; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2]
; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
ret <8 x i16> %res
@ -2041,16 +2045,20 @@ declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>)
define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize {
; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
ret <16 x i16> %res
@ -2195,16 +2203,20 @@ define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1
define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize {
; X86-LABEL: test_int_x86_avx512_psllv_w_128_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8]
; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_psllv_w_128_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8]
; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
ret <8 x i16> %res
@ -2216,16 +2228,20 @@ declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>)
define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize {
; X86-LABEL: test_int_x86_avx512_psllv_w_256_const:
; X86: # %bb.0:
; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_int_x86_avx512_psllv_w_256_const:
; X64: # %bb.0:
; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
ret <16 x i16> %res

View File

@ -0,0 +1,52 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i686-unknown-linux-gnu"
; Check that instcombine doesn't sink dynamic allocas across llvm.stacksave.
; Helper to generate branch conditions.
declare i1 @cond()
declare i32* @use_and_return(i32*)
declare i8* @llvm.stacksave() #0
declare void @llvm.stackrestore(i8*) #0
define void @foo(i32 %x) {
entry:
%c1 = call i1 @cond()
br i1 %c1, label %ret, label %nonentry
nonentry: ; preds = %entry
%argmem = alloca i32, i32 %x, align 4
%sp = call i8* @llvm.stacksave()
%c2 = call i1 @cond()
br i1 %c2, label %ret, label %sinktarget
sinktarget: ; preds = %nonentry
; Arrange for there to be a single use of %argmem by returning it.
%p = call i32* @use_and_return(i32* nonnull %argmem)
store i32 13, i32* %p, align 4
call void @llvm.stackrestore(i8* %sp)
%0 = call i32* @use_and_return(i32* %p)
br label %ret
ret: ; preds = %sinktarget, %nonentry, %entry
ret void
}
; CHECK-LABEL: define void @foo(i32 %x)
; CHECK: nonentry:
; CHECK: %argmem = alloca i32, i32 %x
; CHECK: %sp = call i8* @llvm.stacksave()
; CHECK: %c2 = call i1 @cond()
; CHECK: br i1 %c2, label %ret, label %sinktarget
; CHECK: sinktarget:
; CHECK: %p = call i32* @use_and_return(i32* nonnull %argmem)
; CHECK: store i32 13, i32* %p
; CHECK: call void @llvm.stackrestore(i8* %sp)
; CHECK: %0 = call i32* @use_and_return(i32* %p)
attributes #0 = { nounwind }

View File

@ -3,93 +3,185 @@
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-8 -slp-min-tree-size=6 | FileCheck %s --check-prefixes=ALL,FORCE_REDUCTION
define void @Test(i32) {
; ALL-LABEL: @Test(
; ALL-NEXT: entry:
; ALL-NEXT: br label [[LOOP:%.*]]
; ALL: loop:
; ALL-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; ALL-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; ALL-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
; ALL-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
; ALL-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
; ALL-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
; ALL-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
; ALL-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
; ALL-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
; ALL-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
; ALL-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
; ALL-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
; ALL-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
; ALL-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
; ALL-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
; ALL-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
; ALL-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
; ALL-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
; ALL-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
; ALL-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
; ALL-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
; ALL-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
; ALL-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
; ALL-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
; ALL-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
; ALL-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
; ALL-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
; ALL-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
; ALL-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
; ALL-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
; ALL-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
; ALL-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
; ALL-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
; ALL-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
; ALL-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
; ALL-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
; ALL-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
; ALL-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
; ALL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
; ALL-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP2]], i32 1
; ALL-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7
; ALL-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP6]], i32 0
; ALL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
; ALL-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP5]], [[TMP8]]
; ALL-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP5]], [[TMP8]]
; ALL-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; ALL-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; ALL-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
; ALL-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; ALL-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; ALL-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; ALL-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; ALL-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; ALL-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
; ALL-NEXT: [[OP_EXTRA31:%.*]] = and i32 [[OP_EXTRA30]], [[TMP2]]
; ALL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
; ALL-NEXT: br label [[LOOP]]
; CHECK-LABEL: @Test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>, [[SHUFFLE]]
; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = and <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP15]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP14]], i32 1
; CHECK-NEXT: br label [[LOOP]]
;
; FORCE_REDUCTION-LABEL: @Test(
; FORCE_REDUCTION-NEXT: entry:
; FORCE_REDUCTION-NEXT: br label [[LOOP:%.*]]
; FORCE_REDUCTION: loop:
; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> <i32 0, i32 55, i32 285, i32 1240>, [[SHUFFLE]]
; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; FORCE_REDUCTION-NEXT: [[BIN_RDX2:%.*]] = and <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA8:%.*]] = and i32 [[OP_EXTRA7]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA9:%.*]] = and i32 [[OP_EXTRA8]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA10:%.*]] = and i32 [[OP_EXTRA9]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA11:%.*]] = and i32 [[OP_EXTRA10]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA12:%.*]] = and i32 [[OP_EXTRA11]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA13:%.*]] = and i32 [[OP_EXTRA12]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA14:%.*]] = and i32 [[OP_EXTRA13]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA15:%.*]] = and i32 [[OP_EXTRA14]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA16:%.*]] = and i32 [[OP_EXTRA15]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA17:%.*]] = and i32 [[OP_EXTRA16]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA18:%.*]] = and i32 [[OP_EXTRA17]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA19:%.*]] = and i32 [[OP_EXTRA18]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA20:%.*]] = and i32 [[OP_EXTRA19]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA21:%.*]] = and i32 [[OP_EXTRA20]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA22:%.*]] = and i32 [[OP_EXTRA21]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA23:%.*]] = and i32 [[OP_EXTRA22]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
; FORCE_REDUCTION-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_40]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
; FORCE_REDUCTION-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> undef, i32 [[VAL_41]], i32 0
; FORCE_REDUCTION-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
; FORCE_REDUCTION-NEXT: [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
; FORCE_REDUCTION-NEXT: [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]
; FORCE_REDUCTION-NEXT: [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
; FORCE_REDUCTION-NEXT: br label [[LOOP]]
;
entry:
br label %loop

View File

@ -7,7 +7,7 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 31, i32 undef>, i32 [[PARAM:%.*]], i32 1
; CHECK-NEXT: br label [[BCI_15:%.*]]
; CHECK: bci_15:
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP7:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
@ -28,13 +28,6 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]]
; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]]
; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP6]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> <i32 16, i32 undef>, i32 [[V42]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i32> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@ -43,9 +36,12 @@ define void @mainTest(i32 %param, i32 * %vals, i32 %len) {
; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <16 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP12]], [[TMP2]]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]]
; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
; CHECK-NEXT: br i1 true, label [[BCI_15]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: ret void

View File

@ -1745,6 +1745,55 @@ entry:
ret void
}
declare void @llvm.lifetime.start.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture)
@array = dso_local global [10 x float] undef, align 4
define void @test29(i32 %num, i32 %tid) {
; CHECK-LABEL: @test29(
; CHECK-NOT: alloca [10 x float]
; CHECK: ret void
entry:
%ra = alloca [10 x float], align 4
call void @llvm.lifetime.start.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
%cmp1 = icmp sgt i32 %num, 0
br i1 %cmp1, label %bb1, label %bb7
bb1:
%tobool = icmp eq i32 %tid, 0
%conv.i = zext i32 %tid to i64
%0 = bitcast [10 x float]* %ra to i32*
%1 = load i32, i32* %0, align 4
%arrayidx5 = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 %conv.i
%2 = bitcast float* %arrayidx5 to i32*
br label %bb2
bb2:
%i.02 = phi i32 [ %num, %bb1 ], [ %sub, %bb5 ]
br i1 %tobool, label %bb3, label %bb4
bb3:
br label %bb5
bb4:
store i32 %1, i32* %2, align 4
br label %bb5
bb5:
%sub = add i32 %i.02, -1
%cmp = icmp sgt i32 %sub, 0
br i1 %cmp, label %bb2, label %bb6
bb6:
br label %bb7
bb7:
call void @llvm.lifetime.end.isVoid.i64.p0i8(i64 40, [10 x float]* nonnull %ra)
ret void
}
!0 = !{!1, !1, i64 0, i64 1}
!1 = !{!2, i64 1, !"type_0"}
!2 = !{!"root"}

View File

@ -54,7 +54,7 @@ svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm
REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226.
set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DCMAKE_INSTALL_UCRT_LIBRARIES=ON -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version% -DLLDB_RELOCATABLE_PYTHON=1 -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: "
REM TODO: Run all tests, including lld and compiler-rt.
REM TODO: Run the "check-all" tests.
set "VSCMD_START_DIR=%CD%"
call "%vsdevcmd%" -arch=x86
@ -66,7 +66,9 @@ REM Work around VS2017 bug by using MinSizeRel.
cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
ninja all || ninja all || ninja all || exit /b
ninja check || ninja check || ninja check || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-lld || ninja check-lld || ninja check-lld || exit /b
ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
cd..
mkdir build32
@ -76,7 +78,9 @@ set CXX=..\build32_stage0\bin\clang-cl
cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% ..\llvm || exit /b
ninja all || ninja all || ninja all || exit /b
ninja check || ninja check || ninja check || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-lld || ninja check-lld || ninja check-lld || exit /b
ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
ninja package || exit /b
cd ..
@ -101,7 +105,9 @@ REM Work around VS2017 bug by using MinSizeRel.
cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DCMAKE_BUILD_TYPE=MinSizeRel ..\llvm || exit /b
ninja all || ninja all || ninja all || exit /b
ninja check || ninja check || ninja check || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-lld || ninja check-lld || ninja check-lld || exit /b
ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
cd..
mkdir build64
@ -111,6 +117,8 @@ set CXX=..\build64_stage0\bin\clang-cl
cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% ..\llvm || exit /b
ninja all || ninja all || ninja all || exit /b
ninja check || ninja check || ninja check || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-clang || ninja check-clang || ninja check-clang || exit /b
ninja check-lld || ninja check-lld || ninja check-lld || exit /b
ninja check-sanitizer || ninja check-sanitizer || ninja check-sanitizer || exit /b
ninja package || exit /b
cd ..