Vendor import of llvm release_60 branch r323338:

https://llvm.org/svn/llvm-project/llvm/branches/release_60@323338
This commit is contained in:
Dimitry Andric 2018-01-24 20:23:48 +00:00
parent d215fd3b74
commit a096e0bdf6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist-release_60/; revision=328362
svn path=/vendor/llvm/llvm-release_60-r323338/; revision=328363; tag=vendor/llvm/llvm-release_60-r323338
74 changed files with 2673 additions and 593 deletions

View File

@ -37,6 +37,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@)
set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@)
set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)

View File

@ -54,6 +54,8 @@ Non-comprehensive list of changes in this release
``DIVariables`` to the instructions in a ``Module``. The ``CheckDebugify``
pass determines how much of the metadata is lost.
* Significantly improved quality of CodeView debug info for Windows.
* Note..
.. NOTE
@ -69,10 +71,13 @@ Non-comprehensive list of changes in this release
Changes to the LLVM IR
----------------------
Changes to the ARM Backend
--------------------------
Changes to the ARM Target
-------------------------
During this release ...
During this release the ARM target has:
* Got support for enabling SjLj exception handling on platforms where it
isn't the default.
Changes to the MIPS Target
@ -89,7 +94,10 @@ Changes to the PowerPC Target
Changes to the X86 Target
-------------------------
During this release ...
During this release ...
* Got support for enabling SjLj exception handling on platforms where it
isn't the default.
Changes to the AMDGPU Target
-----------------------------
@ -116,8 +124,46 @@ Changes to the C API
External Open Source Projects Using LLVM 6
==========================================
* A project...
JFS - JIT Fuzzing Solver
------------------------
`JFS <https://github.com/delcypher/jfs>`_ is an experimental constraint solver
designed to investigate using coverage guided fuzzing as an incomplete strategy
for solving boolean, BitVector, and floating-point constraints.
It is built on top of LLVM, Clang, LibFuzzer, and Z3.
The solver works by generating a C++ program where the reachability of an
`abort()` statement is equivalent to finding a satisfying assignment to the
constraints. This program is then compiled by Clang with `SanitizerCoverage
<https://releases.llvm.org/6.0.0/tools/clang/docs/SanitizerCoverage.html>`_
instrumentation and then fuzzed using :doc:`LibFuzzer <LibFuzzer>`.
Zig Programming Language
------------------------
`Zig <http://ziglang.org>`_ is an open-source programming language designed
for robustness, optimality, and clarity. It is intended to replace C. It
provides high level features such as Generics,
Compile Time Function Execution, and Partial Evaluation, yet exposes low level
LLVM IR features such as Aliases. Zig uses Clang to provide automatic
import of .h symbols - even inline functions and macros. Zig uses LLD combined
with lazily building compiler-rt to provide out-of-the-box cross-compiling for
all supported targets.
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
are underway.
Additional Information
======================

View File

@ -254,23 +254,23 @@ std::string RegionBase<Tr>::getNameStr() const {
template <class Tr>
void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
if (!contains(BB))
llvm_unreachable("Broken region found: enumerated BB not in region!");
report_fatal_error("Broken region found: enumerated BB not in region!");
BlockT *entry = getEntry(), *exit = getExit();
for (BlockT *Succ :
make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) {
if (!contains(Succ) && exit != Succ)
llvm_unreachable("Broken region found: edges leaving the region must go "
"to the exit node!");
report_fatal_error("Broken region found: edges leaving the region must go "
"to the exit node!");
}
if (entry != BB) {
for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
InvBlockTraits::child_end(BB))) {
if (!contains(Pred))
llvm_unreachable("Broken region found: edges entering the region must "
"go to the entry node!");
report_fatal_error("Broken region found: edges entering the region must "
"go to the entry node!");
}
}
}
@ -557,7 +557,7 @@ void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
} else {
BlockT *BB = Element->template getNodeAs<BlockT>();
if (getRegionFor(BB) != R)
llvm_unreachable("BB map does not match region nesting");
report_fatal_error("BB map does not match region nesting");
}
}
}

View File

@ -56,7 +56,7 @@ class BaseIndexOffset {
int64_t &Off);
/// Parses tree in Ptr for base, index, offset addresses.
static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
};
} // end namespace llvm

View File

@ -177,13 +177,7 @@ class CodeViewContext {
unsigned IACol);
/// Retreive the function info if this is a valid function id, or nullptr.
MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) {
if (FuncId >= Functions.size())
return nullptr;
if (Functions[FuncId].isUnallocatedFunctionInfo())
return nullptr;
return &Functions[FuncId];
}
MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId);
/// Saves the information from the currently parsed .cv_loc directive
/// and sets CVLocSeen. When the next instruction is assembled an entry
@ -199,50 +193,22 @@ class CodeViewContext {
CurrentCVLoc.setIsStmt(IsStmt);
CVLocSeen = true;
}
void clearCVLocSeen() { CVLocSeen = false; }
bool getCVLocSeen() { return CVLocSeen; }
void clearCVLocSeen() { CVLocSeen = false; }
const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
bool isValidCVFileNumber(unsigned FileNumber);
/// \brief Add a line entry.
void addLineEntry(const MCCVLineEntry &LineEntry) {
size_t Offset = MCCVLines.size();
auto I = MCCVLineStartStop.insert(
{LineEntry.getFunctionId(), {Offset, Offset + 1}});
if (!I.second)
I.first->second.second = Offset + 1;
MCCVLines.push_back(LineEntry);
}
void addLineEntry(const MCCVLineEntry &LineEntry);
std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId) {
std::vector<MCCVLineEntry> FilteredLines;
std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId);
auto I = MCCVLineStartStop.find(FuncId);
if (I != MCCVLineStartStop.end())
for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
++Idx)
if (MCCVLines[Idx].getFunctionId() == FuncId)
FilteredLines.push_back(MCCVLines[Idx]);
return FilteredLines;
}
std::pair<size_t, size_t> getLineExtent(unsigned FuncId);
std::pair<size_t, size_t> getLineExtent(unsigned FuncId) {
auto I = MCCVLineStartStop.find(FuncId);
// Return an empty extent if there are no cv_locs for this function id.
if (I == MCCVLineStartStop.end())
return {~0ULL, 0};
return I->second;
}
ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R) {
if (R <= L)
return None;
if (L >= MCCVLines.size())
return None;
return makeArrayRef(&MCCVLines[L], R - L);
}
ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R);
/// Emits a line table substream.
void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId,

View File

@ -628,7 +628,7 @@ struct SemiNCAInfo {
DecreasingLevel>
Bucket; // Queue of tree nodes sorted by level in descending order.
SmallDenseSet<TreeNodePtr, 8> Affected;
SmallDenseSet<TreeNodePtr, 8> Visited;
SmallDenseMap<TreeNodePtr, unsigned, 8> Visited;
SmallVector<TreeNodePtr, 8> AffectedQueue;
SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
};
@ -706,7 +706,7 @@ struct SemiNCAInfo {
// algorithm does not really know or use the set of roots and can make a
// different (implicit) decision about which nodes within an infinite loop
// becomes a root.
if (DT.isVirtualRoot(TN->getIDom())) {
if (TN && !DT.isVirtualRoot(TN->getIDom())) {
DEBUG(dbgs() << "Root " << BlockNamePrinter(R)
<< " is not virtual root's child\n"
<< "The entire tree needs to be rebuilt\n");
@ -753,14 +753,16 @@ struct SemiNCAInfo {
while (!II.Bucket.empty()) {
const TreeNodePtr CurrentNode = II.Bucket.top().second;
const unsigned CurrentLevel = CurrentNode->getLevel();
II.Bucket.pop();
DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
<< BlockNamePrinter(CurrentNode) << "\n");
II.Visited.insert(CurrentNode);
II.Visited.insert({CurrentNode, CurrentLevel});
II.AffectedQueue.push_back(CurrentNode);
// Discover and collect affected successors of the current node.
VisitInsertion(DT, BUI, CurrentNode, CurrentNode->getLevel(), NCD, II);
VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II);
}
// Finish by updating immediate dominators and levels.
@ -772,13 +774,17 @@ struct SemiNCAInfo {
const TreeNodePtr TN, const unsigned RootLevel,
const TreeNodePtr NCD, InsertionInfo &II) {
const unsigned NCDLevel = NCD->getLevel();
DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n");
DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel "
<< RootLevel << "\n");
SmallVector<TreeNodePtr, 8> Stack = {TN};
assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
SmallPtrSet<TreeNodePtr, 8> Processed;
do {
TreeNodePtr Next = Stack.pop_back_val();
DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n");
for (const NodePtr Succ :
ChildrenGetter<IsPostDom>::Get(Next->getBlock(), BUI)) {
@ -786,19 +792,31 @@ struct SemiNCAInfo {
assert(SuccTN && "Unreachable successor found at reachable insertion");
const unsigned SuccLevel = SuccTN->getLevel();
DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
<< ", level = " << SuccLevel << "\n");
DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = "
<< SuccLevel << "\n");
// Do not process the same node multiple times.
if (Processed.count(Next) > 0)
continue;
// Succ dominated by subtree From -- not affected.
// (Based on the lemma 2.5 from the second paper.)
if (SuccLevel > RootLevel) {
DEBUG(dbgs() << "\t\tDominated by subtree From\n");
if (II.Visited.count(SuccTN) != 0)
continue;
if (II.Visited.count(SuccTN) != 0) {
DEBUG(dbgs() << "\t\t\talready visited at level "
<< II.Visited[SuccTN] << "\n\t\t\tcurrent level "
<< RootLevel << ")\n");
// A node can be necessary to visit again if we see it again at
// a lower level than before.
if (II.Visited[SuccTN] >= RootLevel)
continue;
}
DEBUG(dbgs() << "\t\tMarking visited not affected "
<< BlockNamePrinter(Succ) << "\n");
II.Visited.insert(SuccTN);
II.Visited.insert({SuccTN, RootLevel});
II.VisitedNotAffectedQueue.push_back(SuccTN);
Stack.push_back(SuccTN);
} else if ((SuccLevel > NCDLevel + 1) &&
@ -809,6 +827,8 @@ struct SemiNCAInfo {
II.Bucket.push({SuccLevel, SuccTN});
}
}
Processed.insert(Next);
} while (!Stack.empty());
}
@ -920,21 +940,21 @@ struct SemiNCAInfo {
const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To);
const TreeNodePtr NCD = DT.getNode(NCDBlock);
// To dominates From -- nothing to do.
if (ToTN == NCD) return;
// If To dominates From -- nothing to do.
if (ToTN != NCD) {
DT.DFSInfoValid = false;
DT.DFSInfoValid = false;
const TreeNodePtr ToIDom = ToTN->getIDom();
DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
<< BlockNamePrinter(ToIDom) << "\n");
const TreeNodePtr ToIDom = ToTN->getIDom();
DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
<< BlockNamePrinter(ToIDom) << "\n");
// To remains reachable after deletion.
// (Based on the caption under Figure 4. from the second paper.)
if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
DeleteReachable(DT, BUI, FromTN, ToTN);
else
DeleteUnreachable(DT, BUI, ToTN);
// To remains reachable after deletion.
// (Based on the caption under Figure 4. from the second paper.)
if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
DeleteReachable(DT, BUI, FromTN, ToTN);
else
DeleteUnreachable(DT, BUI, ToTN);
}
if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
}

View File

@ -95,14 +95,9 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
/// \brief Try to vectorize a list of operands.
/// \@param BuildVector A list of users to ignore for the purpose of
/// scheduling and cost estimation when NeedExtraction
/// is false.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
ArrayRef<Value *> BuildVector = None,
bool AllowReorder = false,
bool NeedExtraction = false);
bool AllowReorder = false);
/// \brief Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);

View File

@ -2700,8 +2700,13 @@ class AddressingModeCombiner {
// we still need to collect it due to original value is different.
// And later we will need all original values as anchors during
// finding the common Phi node.
// We also must reject the case when base offset is different and
// scale reg is not null, we cannot handle this case due to merge of
// different offsets will be used as ScaleReg.
if (DifferentField != ExtAddrMode::MultipleFields &&
DifferentField != ExtAddrMode::ScaleField) {
DifferentField != ExtAddrMode::ScaleField &&
(DifferentField != ExtAddrMode::BaseOffsField ||
!NewAddrMode.ScaledReg)) {
AddrModes.emplace_back(NewAddrMode);
return true;
}

View File

@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) {
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
if (GV.isDeclaration() || GV.isThreadLocal() ||
GV.hasSection() || GV.hasImplicitSection())
GV.hasSection() || GV.hasImplicitSection() ||
GV.hasDLLExportStorageClass())
continue;
// It's not safe to merge globals that may be preempted

View File

@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
CurSrcPair = Pair;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
!DisableAdvCopyOpt, TII);
ValueTrackerResult Res;
bool ShouldRewrite = false;
do {
// Follow the chain of copies until we reach the top of the use-def chain
// or find a more suitable source.
Res = ValTracker.getNextSource();
// Follow the chain of copies until we find a more suitable source, a phi
// or have to abort.
while (true) {
ValueTrackerResult Res = ValTracker.getNextSource();
// Abort at the end of a chain (without finding a suitable source).
if (!Res.isValid())
break;
return false;
// Insert the Def -> Use entry for the recently found source.
ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
return false;
// Keep following the chain if the value isn't any better yet.
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
CurSrcPair.SubReg);
} while (!ShouldRewrite);
if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg))
continue;
// Continue looking for new sources...
if (Res.isValid())
continue;
// We currently cannot deal with subreg operands on PHI instructions
// (see insertPHI()).
if (PHICount > 0 && CurSrcPair.SubReg != 0)
continue;
// Do not continue searching for a new source if the there's at least
// one use-def which cannot be rewritten.
if (!ShouldRewrite)
return false;
}
if (PHICount >= RewritePHILimit) {
DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
return false;
// We found a suitable source, and are done with this chain.
break;
}
}
// If we did not find a more suitable source, there is nothing to optimize.
@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
// NewRC is only correct if no subregisters are involved. findNextSource()
// should have rejected those cases already.
assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
unsigned NewVR = MRI->createVirtualRegister(NewRC);
MachineBasicBlock *MBB = OrigPHI->getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),

View File

@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
EVT ExtVT;
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
// Only add this load if we can make it more narrow.
if (ExtVT.bitsLT(Load->getMemoryVT()))
// ZEXTLOAD is already small enough.
if (Load->getExtensionType() == ISD::ZEXTLOAD &&
ExtVT.bitsGE(Load->getMemoryVT()))
continue;
// Use LE to convert equal sized loads to zext.
if (ExtVT.bitsLE(Load->getMemoryVT()))
Loads.insert(Load);
continue;
}
return false;
@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
if (Loads.size() == 0)
return false;
DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
SDValue MaskOp = N->getOperand(1);
// If it exists, fixup the single node we allow in the tree that needs
// masking.
if (FixupNode) {
DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
FixupNode->getValueType(0),
SDValue(FixupNode, 0), MaskOp);
@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
// Narrow any constants that need it.
for (auto *LogicN : NodesWithConsts) {
auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
SDValue(C, 0), MaskOp);
DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
SDValue Op0 = LogicN->getOperand(0);
SDValue Op1 = LogicN->getOperand(1);
if (isa<ConstantSDNode>(Op0))
std::swap(Op0, Op1);
SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
Op1, MaskOp);
DAG.UpdateNodeOperands(LogicN, Op0, And);
}
// Create narrow loads.
for (auto *Load : Loads) {
DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
SDValue(Load, 0), MaskOp);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Loads must share the same base address
BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
Base = Ptr;
@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
EVT MemVT = St->getMemoryVT();
SDValue Val = peekThroughBitcast(St->getValue());
@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates(
EVT LoadVT;
if (IsLoadSrc) {
auto *Ld = cast<LoadSDNode>(Val);
LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
LBasePtr = BaseIndexOffset::match(Ld, DAG);
LoadVT = Ld->getMemoryVT();
// Load and store should be the same type.
if (MemVT != LoadVT)
@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates(
return false;
// The Load's Base Ptr must also match
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates(
Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->getMemoryVT() != MemVT)
break;
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
// If this is not the first ptr that we check.
int64_t LdOffset = 0;
if (LdBasePtr.getBase().getNode()) {
@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
// Check for BaseIndexOffset matching.
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
int64_t PtrDiff;
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// If the base are the same frame index but the we couldn't find a
// constant offset, (indices are different) be conservative.
if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
!MFI.isFixedObjectIndex(B->getIndex())))
return false;
}
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
// able to calculate their relative offset if at least one arises
// from an alloca. However, these allocas cannot overlap and we
// can infer there is no alias.
if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
// If the base are the same frame index but the we couldn't find a
// constant offset, (indices are different) be conservative.
if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
!MFI.isFixedObjectIndex(B->getIndex())))
return false;
}
bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
// If of mismatched base types or checkable indices we can check
// they do not alias.
if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
(IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
(IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
return false;
// If of mismatched base types or checkable indices we can check
// they do not alias.
if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
(IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
(IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
return false;
}
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
// compared to the size and offset of the access, we may be able to prove they
// do not alias. This check is conservative for now to catch cases created by
// splitting vector types.
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
// cases created by splitting vector types.
int64_t SrcValOffset0 = Op0->getSrcValueOffset();
int64_t SrcValOffset1 = Op1->getSrcValueOffset();
unsigned OrigAlignment0 = Op0->getOriginalAlignment();
@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of similar
// size. Return no alias.
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
(OffAlign1 + NumBytes1) <= OffAlign0)
return false;
@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
break;
// Find the base pointer and offset for this memory node.
BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
// Check that the base pointer is the same as the original one.
if (!BasePtr.equalBaseIndex(Ptr, DAG))

View File

@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ZERO_EXTEND:
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
DAG.getValueType(AtomicType));
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
ExtRes = LHS;
break;
case ISD::ANY_EXTEND:
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
break;
default:
llvm_unreachable("Invalid atomic op extension");

View File

@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
if (VT.getSizeInBits() / 8 != Bytes)
return false;
SDValue Loc = LD->getOperand(1);
SDValue BaseLoc = Base->getOperand(1);
auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
auto LocDecomp = BaseIndexOffset::match(Loc, *this);
auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
auto LocDecomp = BaseIndexOffset::match(LD, *this);
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))

View File

@ -21,6 +21,9 @@ using namespace llvm;
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
// Conservatively fail if we a match failed..
if (!Base.getNode() || !Other.Base.getNode())
return false;
// Initial Offset difference.
Off = Other.Offset - Offset;
@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
}
/// Parses tree in Ptr for base, index, offset addresses.
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
// (((B + I*M) + c)) + c ...
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
SDValue Index = SDValue();
int64_t Offset = 0;
bool IsIndexSignExt = false;
// pre-inc/pre-dec ops are components of EA.
if (N->getAddressingMode() == ISD::PRE_INC) {
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
Offset += C->getSExtValue();
else // If unknown, give up now.
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
Offset -= C->getSExtValue();
else // If unknown, give up now.
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
}
// Consume constant adds & ors with appropriate masking.
while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {

View File

@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
// Darwin 10 and higher has an optimized __bzero.
if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {
setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");
// Some darwins have an optimized __bzero/bzero function.
switch (TT.getArch()) {
case Triple::x86:
case Triple::x86_64:
if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
break;
}
if (darwinHasSinCos(TT)) {

View File

@ -954,7 +954,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
NewGV->setLinkage(GlobalValue::InternalLinkage);
Constant *C = NewGV;
if (DGV)
// Only create a bitcast if necessary. In particular, with
// DebugTypeODRUniquing we may reach metadata in the destination module
// containing a GV from the source module, in which case SGV will be
// the same as DGV and NewGV, and TypeMap.get() will assert since it
// assumes it is being invoked on a type in the source module.
if (DGV && NewGV != SGV)
C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
if (DGV && NewGV != DGV) {

View File

@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber,
return true;
}
MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) {
if (FuncId >= Functions.size())
return nullptr;
if (Functions[FuncId].isUnallocatedFunctionInfo())
return nullptr;
return &Functions[FuncId];
}
bool CodeViewContext::recordFunctionId(unsigned FuncId) {
if (FuncId >= Functions.size())
Functions.resize(FuncId + 1);
@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
OS.EmitValueImpl(SRE, 4);
}
void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
size_t Offset = MCCVLines.size();
auto I = MCCVLineStartStop.insert(
{LineEntry.getFunctionId(), {Offset, Offset + 1}});
if (!I.second)
I.first->second.second = Offset + 1;
MCCVLines.push_back(LineEntry);
}
std::vector<MCCVLineEntry>
CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
std::vector<MCCVLineEntry> FilteredLines;
auto I = MCCVLineStartStop.find(FuncId);
if (I != MCCVLineStartStop.end()) {
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
++Idx) {
unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
if (LocationFuncId == FuncId) {
// This was a .cv_loc directly for FuncId, so record it.
FilteredLines.push_back(MCCVLines[Idx]);
} else {
// Check if the current location is inlined in this function. If it is,
// synthesize a statement .cv_loc at the original inlined call site.
auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
if (I != SiteInfo->InlinedAtMap.end()) {
MCCVFunctionInfo::LineInfo &IA = I->second;
// Only add the location if it differs from the previous location.
// Large inlined calls will have many .cv_loc entries and we only need
// one line table entry in the parent function.
if (FilteredLines.empty() ||
FilteredLines.back().getFileNum() != IA.File ||
FilteredLines.back().getLine() != IA.Line ||
FilteredLines.back().getColumn() != IA.Col) {
FilteredLines.push_back(MCCVLineEntry(
MCCVLines[Idx].getLabel(),
MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
}
}
}
}
}
return FilteredLines;
}
std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
auto I = MCCVLineStartStop.find(FuncId);
// Return an empty extent if there are no cv_locs for this function id.
if (I == MCCVLineStartStop.end())
return {~0ULL, 0};
return I->second;
}
ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
if (R <= L)
return None;
if (L >= MCCVLines.size())
return None;
return makeArrayRef(&MCCVLines[L], R - L);
}
void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
unsigned FuncId,
const MCSymbol *FuncBegin,

View File

@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
} else if (TM.getCodeModel() == CodeModel::Large) {
// Materialize the global using movz/movk instructions.
unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto InsertPt = std::next(I.getIterator());
auto MovZ =
BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
.addDef(MovZDstReg);
MovZ->addOperand(MF, I.getOperand(1));
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
AArch64II::MO_NC);
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
unsigned Offset, unsigned ForceDstReg) {
unsigned DstReg =
ForceDstReg ? ForceDstReg
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
TII.get(AArch64::MOVKXi))
.addDef(DstReg)
.addReg(SrcReg);
MovI->addOperand(MF, MachineOperand::CreateGA(
GV, MovZ->getOperand(1).getOffset(), Flags));
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
return DstReg;
};
unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
I.eraseFromParent();
return true;
} else {
I.setDesc(TII.get(AArch64::MOVaddr));
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);

View File

@ -821,7 +821,6 @@ namespace llvm {
MutableArrayRef<int> NewMask, unsigned Options = None);
OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
MutableArrayRef<int> NewMask);
OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results);
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
return concat(Out[0], Out[1], Results);
}
OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
int VecLen = SM.Mask.size();
SmallVector<uint8_t,128> UsedBytes(VecLen);
bool HasUnused = false;
for (int I = 0; I != VecLen; ++I) {
if (SM.Mask[I] != -1)
UsedBytes[I] = 0xFF;
else
HasUnused = true;
}
if (!HasUnused)
return Va;
SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
return OpRef::res(Results.top());
}
OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});

View File

@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
// compared to a value that is atomically loaded (atomic loads zero-extend).
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
"Expecting an atomic compare-and-swap here.");
SDLoc dl(Op);
auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
EVT MemVT = AtomicNode->getMemoryVT();
if (MemVT.getSizeInBits() >= 32)
return Op;
SDValue CmpOp = Op.getOperand(2);
// If this is already correctly zero-extended, leave it alone.
auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
if (DAG.MaskedValueIsZero(CmpOp, HighBits))
return Op;
// Clear the high bits of the compare operand.
unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
SDValue NewCmpOp =
DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
DAG.getConstant(MaskVal, dl, MVT::i32));
// Replace the existing compare operand with the properly zero-extended one.
SmallVector<SDValue, 4> Ops;
for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
Ops.push_back(AtomicNode->getOperand(i));
Ops[2] = NewCmpOp;
MachineMemOperand *MMO = AtomicNode->getMemOperand();
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
auto NodeTy =
(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerREM(Op, DAG);
case ISD::BSWAP:
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(Op, DAG);
}
}

View File

@ -430,6 +430,11 @@ namespace llvm {
/// The 4xf32 load used for v4i1 constants.
QVLFSb,
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.
ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
/// GPRC = TOC_ENTRY GA, TOC
/// Loads the entry for GA from the TOC, where the TOC base is given by
/// the last operand.
@ -955,6 +960,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;

View File

@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
// PPC-specific atomic operations.
def PPCatomicCmpSwap_8 :
SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def PPCatomicCmpSwap_16 :
SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
}
}
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
(ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
(ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
// Instructions to support atomic operations
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),

View File

@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
.Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
Flags |= Prefix;
if (getLexer().is(AsmToken::EndOfStatement)) {
// We don't have real instr with the given prefix
// let's use the prefix as the instr.
// TODO: there could be several prefixes one after another
Flags = X86::IP_NO_PREFIX;
break;
}
Name = Parser.getTok().getString();
Parser.Lex(); // eat the prefix
// Hack: we could have something like "rep # some comment" or

View File

@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
SDLoc(V), VT, IndicesVec, SrcVec);
if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
SrcVec =
DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
}
if (VT == MVT::v16i8)
return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);
return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);
}
SDValue
@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
// For v64i1 without 64-bit support we need to split and rejoin.
if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
assert(Subtarget.hasBWI() && "Expected BWI to be legal");
SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);
SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);
SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);
SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);
SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);
SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
SDValue Op1Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
SDValue NewV1 = V1; // Save operand in case early exit happens.
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) &&
NewV1, DL, DAG, Subtarget, Shuffle,
ShuffleSrcVT, ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, V1);
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
DCI.AddToWorklist(Res.getNode());
@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
SDValue NewV1 = V1; // Save operands in case early exit happens.
SDValue NewV2 = V2;
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
V1, V2, DL, DAG, Subtarget, Shuffle,
NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(ShuffleSrcVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleSrcVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
DCI.AddToWorklist(NewV1.getNode());
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
DCI.AddToWorklist(NewV2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);
}
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
AllowIntDomain, V1, V2, DL, DAG,
Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
NewV1 = V1; // Save operands in case early exit happens.
NewV2 = V2;
if (matchBinaryPermuteVectorShuffle(
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
DCI.AddToWorklist(V2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
DCI.AddToWorklist(NewV1.getNode());
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
DCI.AddToWorklist(NewV2.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DCI.AddToWorklist(Res.getNode());
return DAG.getBitcast(RootVT, Res);

View File

@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// type remains the same.
if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
MVT LegalVT = LT.second;
if (LegalVT.getVectorElementType().getSizeInBits() ==
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {

View File

@ -648,7 +648,7 @@ class GVNHoist {
// track in a CHI. In the PDom walk, there can be values in the
// stack which are not control dependent e.g., nested loop.
if (si != RenameStack.end() && si->second.size() &&
DT->dominates(Pred, si->second.back()->getParent())) {
DT->properlyDominates(Pred, si->second.back()->getParent())) {
C.Dest = BB; // Assign the edge
C.I = si->second.pop_back_val(); // Assign the argument
DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName()

View File

@ -14,7 +14,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass {
Region *ParentRegion;
DominatorTree *DT;
LoopInfo *LI;
SmallVector<RegionNode *, 8> Order;
std::deque<RegionNode *> Order;
BBSet Visited;
BBPhiMap DeletedPhis;
@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass {
void gatherPredicates(RegionNode *N);
void collectInfos();
void analyzeNode(RegionNode *N);
void insertConditions(bool Loops);
@ -258,7 +256,6 @@ class StructurizeCFG : public RegionPass {
AU.addRequired<DivergenceAnalysis>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
assert(Visited.empty());
assert(Predicates.empty());
assert(Loops.empty());
assert(LoopPreds.empty());
// The reverse post-order traversal of the list gives us an ordering close
// to what we want. The only problem with it is that sometimes backedges
// for outer loops will be visited before backedges for inner loops.
for (RegionNode *RN : RPOT) {
BasicBlock *BB = RN->getEntry();
Loop *Loop = LI->getLoopFor(BB);
++LoopBlocks[Loop];
// This must be RPO order for the back edge detection to work
for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {
// FIXME: Is there a better order to use for structurization?
Order.push_back(RN);
analyzeNode(RN);
}
unsigned CurrentLoopDepth = 0;
Loop *CurrentLoop = nullptr;
for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
BasicBlock *BB = (*I)->getEntry();
unsigned LoopDepth = LI->getLoopDepth(BB);
if (is_contained(Order, *I))
continue;
if (LoopDepth < CurrentLoopDepth) {
// Make sure we have visited all blocks in this loop before moving back to
// the outer loop.
auto LoopI = I;
while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
LoopI++;
BasicBlock *LoopBB = (*LoopI)->getEntry();
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
--BlockCount;
Order.push_back(*LoopI);
}
}
}
CurrentLoop = LI->getLoopFor(BB);
if (CurrentLoop)
LoopBlocks[CurrentLoop]--;
CurrentLoopDepth = LoopDepth;
Order.push_back(*I);
}
// This pass originally used a post-order traversal and then operated on
// the list in reverse. Now that we are using a reverse post-order traversal
// rather than re-working the whole pass to operate on the list in order,
// we just reverse the list and continue to operate on it in reverse.
std::reverse(Order.begin(), Order.end());
}
/// \brief Determine the end of the loops
@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
}
/// \brief Collect various loop and predicate infos
void StructurizeCFG::collectInfos() {
// Reset predicate
Predicates.clear();
void StructurizeCFG::analyzeNode(RegionNode *RN) {
DEBUG(dbgs() << "Visiting: "
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
<< RN->getEntry()->getName() << '\n');
// and loop infos
Loops.clear();
LoopPreds.clear();
// Analyze all the conditions leading to a node
gatherPredicates(RN);
// Reset the visited nodes
Visited.clear();
// Remember that we've seen this node
Visited.insert(RN->getEntry());
for (RegionNode *RN : reverse(Order)) {
DEBUG(dbgs() << "Visiting: "
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
<< RN->getEntry()->getName() << " Loop Depth: "
<< LI->getLoopDepth(RN->getEntry()) << "\n");
// Analyze all the conditions leading to a node
gatherPredicates(RN);
// Remember that we've seen this node
Visited.insert(RN->getEntry());
// Find the last back edges
analyzeLoops(RN);
}
// Find the last back edges
analyzeLoops(RN);
}
/// \brief Insert the missing branch conditions
@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
LLVMContext &Context = Func->getContext();
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
Order.back()->getEntry();
Order.front()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
DT->addNewBlock(Flow, Dominator);
@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
/// Take one node from the order vector and wire it up
void StructurizeCFG::wireFlow(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
RegionNode *Node = Order.pop_back_val();
RegionNode *Node = Order.front();
Order.pop_front();
Visited.insert(Node->getEntry());
if (isPredictableTrue(Node)) {
@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
PrevNode = Node;
while (!Order.empty() && !Visited.count(LoopEnd) &&
dominatesPredicates(Entry, Order.back())) {
dominatesPredicates(Entry, Order.front())) {
handleLoops(false, LoopEnd);
}
@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
void StructurizeCFG::handleLoops(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
RegionNode *Node = Order.back();
RegionNode *Node = Order.front();
BasicBlock *LoopStart = Node->getEntry();
if (!Loops.count(LoopStart)) {
@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
ParentRegion = R;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
orderNodes();
collectInfos();
createFlow();
insertConditions(false);
insertConditions(true);

View File

@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Instruction *LastInduction = VecInd;
for (unsigned Part = 0; Part < UF; ++Part) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
recordVectorLoopValueForInductionCast(II, LastInduction, Part);
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
else
recordVectorLoopValueForInductionCast(II, LastInduction, Part);
LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
// If we haven't yet vectorized the induction variable, splat the scalar
// induction variable, and build the necessary step vectors.
// TODO: Don't do it unless the vectorized IV is really required.
if (!VectorizedIV) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
Value *EntryPart =
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
else
recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
}
}

View File

@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<
Lane << " from " << *Scalar << ".\n");
ExternalUses.emplace_back(Scalar, nullptr, Lane);
continue;
}
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = { A, B };
return tryToVectorizeList(VL, R, None, true);
return tryToVectorizeList(VL, R, true);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> BuildVector,
bool AllowReorder,
bool NeedExtraction) {
bool AllowReorder) {
if (VL.size() < 2)
return false;
@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< "\n");
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
ArrayRef<Value *> EmptyArray;
ArrayRef<Value *> BuildVectorSlice;
if (!BuildVector.empty())
BuildVectorSlice = BuildVector.slice(I, OpsWidth);
R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
R.buildTree(Ops);
// TODO: check if we can allow reordering for more cases.
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// reductions. However, at this point, we only expect to get here when
// there are exactly two operations.
assert(Ops.size() == 2);
assert(BuildVectorSlice.empty());
Value *ReorderedOps[] = {Ops[1], Ops[0]};
R.buildTree(ReorderedOps, None);
}
@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
Value *VectorizedRoot = R.vectorizeTree();
// Reconstruct the build vector by extracting the vectorized root. This
// way we handle the case where some elements of the vector are
// undefined.
// (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
if (!BuildVectorSlice.empty()) {
// The insert point is the last build vector instruction. The
// vectorized root will precede it. This guarantees that we get an
// instruction. The vectorized tree could have been constant folded.
Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
unsigned VecIdx = 0;
for (auto &V : BuildVectorSlice) {
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
++BasicBlock::iterator(InsertAfter));
Instruction *I = cast<Instruction>(V);
assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
Instruction *Extract =
cast<Instruction>(Builder.CreateExtractElement(
VectorizedRoot, Builder.getInt32(VecIdx++)));
I->setOperand(1, Extract);
I->moveAfter(Extract);
InsertAfter = I;
}
}
R.vectorizeTree();
// Move to the next bundle.
I += VF - 1;
NextInst = I + 1;
@ -5495,11 +5462,9 @@ class HorizontalReduction {
///
/// Returns true if it matches
static bool findBuildVector(InsertElementInst *LastInsertElem,
SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V = nullptr;
do {
BuildVector.push_back(LastInsertElem);
BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
V = LastInsertElem->getOperand(0);
if (isa<UndefValue>(V))
@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
if (!LastInsertElem || !LastInsertElem->hasOneUse())
return false;
} while (true);
std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
///
/// \return true if it matches.
static bool findBuildAggregate(InsertValueInst *IV,
SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
Value *V;
do {
BuildVector.push_back(IV);
BuildVectorOpds.push_back(IV->getInsertedValueOperand());
V = IV->getAggregateOperand();
if (isa<UndefValue>(V))
@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV,
if (!IV || !IV->hasOneUse())
return false;
} while (true);
std::reverse(BuildVector.begin(), BuildVector.end());
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
if (!R.canMapToVector(IVI->getType(), DL))
return false;
SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
if (!findBuildAggregate(IVI, BuildVectorOpds))
return false;
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {
SmallVector<Value *, 16> BuildVector;
SmallVector<Value *, 16> BuildVectorOpds;
if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
if (!findBuildVector(IEI, BuildVectorOpds))
return false;
// Vectorize starting with the build vector operands ignoring the BuildVector
// instructions for the purpose of scheduling and user extraction.
return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// is done when there are exactly two elements since tryToVectorizeList
// asserts that there are only two values when AllowReorder is true.
bool AllowReorder = NumElts == 2;
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
None, AllowReorder)) {
if (NumElts > 1 &&
tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
// Success start over because instructions might have been changed.
HaveVectorizedPhiNodes = true;
Changed = true;

View File

@ -0,0 +1,61 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@foo1 = common global [1073741824 x i32] zeroinitializer, align 4
@foo2 = common global [1073741824 x i32] zeroinitializer, align 4
define i32 @gv_large() {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4
%1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
...
---
name: gv_large
legalized: true
regBankSelected: true
stack:
- { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
di-variable: '', di-expression: '', di-location: '' }
constants:
body: |
bb.1:
; CHECK-LABEL: name: gv_large
; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0
; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16
; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32
; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]]
; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0
; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16
; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32
; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]]
; CHECK: STRWui %wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval)
; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]]
; CHECK: %w0 = COPY [[ADDWrr]]
; CHECK: RET_ReallyLR implicit %w0
%1:gpr(s32) = G_CONSTANT i32 0
%4:gpr(p0) = G_GLOBAL_VALUE @foo1
%3:gpr(p0) = COPY %4(p0)
%7:gpr(p0) = G_GLOBAL_VALUE @foo2
%6:gpr(p0) = COPY %7(p0)
%0:gpr(p0) = G_FRAME_INDEX %stack.0.retval
G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval)
%2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
%5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
%8:gpr(s32) = G_ADD %2, %5
%w0 = COPY %8(s32)
RET_ReallyLR implicit %w0
...

View File

@ -629,14 +629,29 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
; CHECK-NEXT: ret
ret i8 %old
}
define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
%success = extractvalue { i8, i1 } %pair, 1
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
ret i1 %success
}
define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i16:
%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
@ -644,14 +659,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
; CHECK: casah w0, w1, [x[[ADDR]]]
; CHECK-NOT: dmb
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
; CHECK-NEXT: ret
ret i16 %old
}
define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
%success = extractvalue { i16, i1 } %pair, 1
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
; CHECK-NEXT: cmp w[[NEW]], w0, uxth
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
ret i1 %success
}
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; CHECK-LABEL: test_atomic_cmpxchg_i32:
%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire

View File

@ -66,9 +66,10 @@ ENDIF: ; preds = %LOOP
; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
; OPT: llvm.amdgcn.break
; OPT: llvm.amdgcn.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.loop
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.end.cf
; GCN-LABEL: {{^}}multi_if_break_loop:

View File

@ -124,55 +124,100 @@ bb23: ; preds = %bb10
; Earlier version of above, before a run of the structurizer.
; IR-LABEL: @nested_loop_conditions(
; IR: Flow7:
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17)
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8
; IR: %tmp1235 = icmp slt i32 %tmp1134, 9
; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow
; IR: Flow1:
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17)
; IR-NEXT: br i1 %18, label %Flow7, label %bb14
; IR: Flow2:
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0
; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1
; IR-NEXT: br i1 %25, label %bb21, label %Flow3
; IR: bb21:
; IR: %tmp12 = icmp slt i32 %tmp11, 9
; IR-NEXT: %27 = xor i1 %tmp12, true
; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
; IR-NEXT: br label %Flow3
; IR: bb14.lr.ph:
; IR: br label %bb14
; IR: Flow3:
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
; IR: call void @llvm.amdgcn.end.cf(i64 %18)
; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17)
; IR: %1 = extractvalue { i1, i64 } %0, 0
; IR: %2 = extractvalue { i1, i64 } %0, 1
; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
; IR: bb4.bb13_crit_edge:
; IR: br label %Flow4
; IR: Flow4:
; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %2)
; IR: br label %Flow
; IR: bb13:
; IR: br label %bb31
; IR: Flow:
; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4)
; IR: %6 = extractvalue { i1, i64 } %5, 0
; IR: %7 = extractvalue { i1, i64 } %5, 1
; IR: br i1 %6, label %bb13, label %bb31
; IR: bb14:
; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ]
; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ]
; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ]
; IR: %tmp15 = icmp eq i32 %tmp1037, 1
; IR: %8 = xor i1 %tmp15, true
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: %10 = extractvalue { i1, i64 } %9, 0
; IR: %11 = extractvalue { i1, i64 } %9, 1
; IR: br i1 %10, label %bb31.loopexit, label %Flow1
; IR: Flow1:
; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11)
; IR: %13 = extractvalue { i1, i64 } %12, 0
; IR: %14 = extractvalue { i1, i64 } %12, 1
; IR: br i1 %13, label %bb16, label %Flow2
; IR: bb16:
; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32>
; IR: br label %bb18
; IR: Flow2:
; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ]
; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ]
; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ]
; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ]
; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi)
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18)
; IR: br i1 %19, label %Flow3, label %bb14
; IR: bb18:
; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef
; IR: %tmp20 = icmp slt i32 %tmp19, 9
; IR: br i1 %tmp20, label %bb21, label %bb18
; IR: bb21:
; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1
; IR: %tmp23 = lshr i32 %tmp22, 16
; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23
; IR: %tmp25 = uitofp i32 %tmp24 to float
; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000
; IR: %tmp27 = fsub float %tmp26, undef
; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01
; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2
; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29
; IR: %tmp7 = zext i32 %tmp30 to i64
; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7
; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16
; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0
; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef
; IR: %tmp12 = icmp slt i32 %tmp11, 9
; IR: %20 = xor i1 %tmp12, true
; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken)
; IR: br label %Flow2
; IR: bb31.loopexit:
; IR: br label %Flow1
; IR: bb31:
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7)
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
; IR-NEXT: ret void
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
; IR: store volatile i32 0, i32 addrspace(1)* undef
; IR: ret void
; GCN-LABEL: {{^}}nested_loop_conditions:

View File

@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; ARM: @ %bb.0: @ %entry
; ARM-NEXT: ldrb r0, [r0]
; ARM-NEXT: uxtb r2, r2
; ARM-NEXT: and r0, r0, r1
; ARM-NEXT: uxtb r1, r0
; ARM-NEXT: and r1, r0, r1
; ARM-NEXT: mov r0, #0
; ARM-NEXT: cmp r1, r2
; ARM-NEXT: movweq r0, #1
@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; ARMEB: @ %bb.0: @ %entry
; ARMEB-NEXT: ldrb r0, [r0]
; ARMEB-NEXT: uxtb r2, r2
; ARMEB-NEXT: and r0, r0, r1
; ARMEB-NEXT: uxtb r1, r0
; ARMEB-NEXT: and r1, r0, r1
; ARMEB-NEXT: mov r0, #0
; ARMEB-NEXT: cmp r1, r2
; ARMEB-NEXT: movweq r0, #1
@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
;
; THUMB1-LABEL: test6:
; THUMB1: @ %bb.0: @ %entry
; THUMB1-NEXT: ldrb r0, [r0]
; THUMB1-NEXT: ands r0, r1
; THUMB1-NEXT: uxtb r3, r0
; THUMB1-NEXT: ldrb r3, [r0]
; THUMB1-NEXT: ands r3, r1
; THUMB1-NEXT: uxtb r2, r2
; THUMB1-NEXT: movs r0, #1
; THUMB1-NEXT: movs r1, #0
@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
; THUMB2: @ %bb.0: @ %entry
; THUMB2-NEXT: ldrb r0, [r0]
; THUMB2-NEXT: uxtb r2, r2
; THUMB2-NEXT: ands r0, r1
; THUMB2-NEXT: uxtb r1, r0
; THUMB2-NEXT: ands r1, r0
; THUMB2-NEXT: movs r0, #0
; THUMB2-NEXT: cmp r1, r2
; THUMB2-NEXT: it eq

View File

@ -49,9 +49,10 @@ entry:
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0
; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]]
; CHECK-THUMBV6-NEXT: movs r0, #1
; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]]
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]]
; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]]
; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]]
; CHECK-THUMBV6-NEXT: [[END]]:

View File

@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
; CHECK: dmb ish
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
; CHECK: dmb ish
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic

View File

@ -0,0 +1,15 @@
; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s
@x = global i32 0, align 4
@y = dllexport global i32 0, align 4
define void @f1(i32 %a1, i32 %a2) {
; CHECK: f1:
; CHECK: movw [[REG1:r[0-9]+]], :lower16:x
; CHECK: movt [[REG1]], :upper16:x
store i32 %a1, i32* @x, align 4
store i32 %a2, i32* @y, align 4
ret void
}
; CHECK-NOT: .L_MergedGlobals

View File

@ -1,8 +1,9 @@
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefix=CHECK-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s --check-prefixes=CHECK-WIN32
@x = global i32 0, align 4
@y = global i32 0, align 4
@ -10,10 +11,13 @@
define void @f1(i32 %a1, i32 %a2) {
;CHECK: f1:
;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]]
;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.?LCPI[0-9]+_[0-9]]]
;CHECK: [[LABEL1]]:
;CHECK-MERGE: .long .L_MergedGlobals
;CHECK-NO-MERGE: .long {{_?x}}
;CHECK-WIN32: f1:
;CHECK-WIN32: movw [[REG1:r[0-9]+]], :lower16:.L_MergedGlobals
;CHECK-WIN32: movt [[REG1]], :upper16:.L_MergedGlobals
store i32 %a1, i32* @x, align 4
store i32 %a2, i32* @y, align 4
ret void
@ -21,10 +25,13 @@ define void @f1(i32 %a1, i32 %a2) {
define void @g1(i32 %a1, i32 %a2) {
;CHECK: g1:
;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]]
;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.?LCPI[0-9]+_[0-9]]]
;CHECK: [[LABEL2]]:
;CHECK-MERGE: .long .L_MergedGlobals
;CHECK-NO-MERGE: .long {{_?y}}
;CHECK-WIN32: g1:
;CHECK-WIN32: movw [[REG2:r[0-9]+]], :lower16:.L_MergedGlobals
;CHECK-WIN32: movt [[REG2]], :upper16:.L_MergedGlobals
store i32 %a1, i32* @y, align 4
store i32 %a2, i32* @z, align 4
ret void
@ -35,6 +42,7 @@ define void @g1(i32 %a1, i32 %a2) {
;CHECK-MERGE: .type .L_MergedGlobals,%object
;CHECK-MERGE: .local .L_MergedGlobals
;CHECK-MERGE: .comm .L_MergedGlobals,12,4
;CHECK-WIN32: .lcomm .L_MergedGlobals,12,4
;CHECK-MERGE: .globl x
;CHECK-MERGE: x = .L_MergedGlobals
@ -45,3 +53,10 @@ define void @g1(i32 %a1, i32 %a2) {
;CHECK-MERGE: .globl z
;CHECK-MERGE: z = .L_MergedGlobals+8
;CHECK-MERGE: .size z, 4
;CHECK-WIN32: .globl x
;CHECK-WIN32: x = .L_MergedGlobals
;CHECK-WIN32: .globl y
;CHECK-WIN32: y = .L_MergedGlobals+4
;CHECK-WIN32: .globl z
;CHECK-WIN32: z = .L_MergedGlobals+8

View File

@ -0,0 +1,67 @@
# RUN: llc -o - %s -mtriple=armv7-- -verify-machineinstrs -run-pass=peephole-opt | FileCheck %s
#
# Make sure we do not crash on this input.
# Note that this input could in principle be optimized, but right now we don't
# have this case implemented so the output should simply be unchanged.
#
# CHECK-LABEL: name: func
# CHECK: body: |
# CHECK: bb.0:
# CHECK: Bcc %bb.2, 1, undef %cpsr
#
# CHECK: bb.1:
# CHECK: %0:dpr = IMPLICIT_DEF
# CHECK: %1:gpr, %2:gpr = VMOVRRD %0, 14, %noreg
# CHECK: B %bb.3
#
# CHECK: bb.2:
# CHECK: %3:spr = IMPLICIT_DEF
# CHECK: %4:gpr = VMOVRS %3, 14, %noreg
#
# CHECK: bb.3:
# CHECK: %5:gpr = PHI %1, %bb.1, %4, %bb.2
# CHECK: %6:spr = VMOVSR %5, 14, %noreg
---
name: func0
tracksRegLiveness: true
body: |
bb.0:
Bcc %bb.2, 1, undef %cpsr
bb.1:
%0:dpr = IMPLICIT_DEF
%1:gpr, %2:gpr = VMOVRRD %0:dpr, 14, %noreg
B %bb.3
bb.2:
%3:spr = IMPLICIT_DEF
%4:gpr = VMOVRS %3:spr, 14, %noreg
bb.3:
%5:gpr = PHI %1, %bb.1, %4, %bb.2
%6:spr = VMOVSR %5, 14, %noreg
...
# CHECK-LABEL: name: func1
# CHECK: %6:spr = PHI %0, %bb.1, %2, %bb.2
# CHEKC: %7:spr = COPY %6
---
name: func1
tracksRegLiveness: true
body: |
bb.0:
Bcc %bb.2, 1, undef %cpsr
bb.1:
%1:spr = IMPLICIT_DEF
%0:gpr = VMOVRS %1, 14, %noreg
B %bb.3
bb.2:
%3:spr = IMPLICIT_DEF
%2:gpr = VMOVRS %3:spr, 14, %noreg
bb.3:
%4:gpr = PHI %0, %bb.1, %2, %bb.2
%5:spr = VMOVSR %4, 14, %noreg
...

View File

@ -0,0 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Make sure that a negative value for the compare-and-swap is zero extended
; from i8/i16 to i32 since it will be compared for equality.
; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
define signext i32 @main() {
; CHECK-LABEL: main:
; CHECK: li 3, -32477
; CHECK: lis 12, 0
; CHECK: li 6, 234
; CHECK: sth 3, 46(1)
; CHECK: ori 4, 12, 33059
; CHECK: sync
; CHECK: .LBB0_1: # %L.entry
; CHECK: lharx 3, 0, 5
; CHECK: cmpw 4, 3
; CHECK: bne 0, .LBB0_3
; CHECK: sthcx. 6, 0, 5
; CHECK: bne 0, .LBB0_1
; CHECK: b .LBB0_4
; CHECK: .LBB0_3: # %L.entry
; CHECK: sthcx. 3, 0, 5
; CHECK: .LBB0_4: # %L.entry
; CHECK: cmplwi 3, 33059
; CHECK: lwsync
; CHECK: lhz 3, 46(1)
; CHECK: cmplwi 3, 234
;
; CHECK-P7-LABEL: main:
; CHECK-P7: lis 4, 0
; CHECK-P7: li 7, 0
; CHECK-P7: li 3, -32477
; CHECK-P7: sth 3, 46(1)
; CHECK-P7: li 5, 234
; CHECK-P7: ori 4, 4, 33059
; CHECK-P7: rlwinm 3, 6, 3, 27, 27
; CHECK-P7: ori 7, 7, 65535
; CHECK-P7: sync
; CHECK-P7: slw 8, 5, 3
; CHECK-P7: slw 5, 7, 3
; CHECK-P7: slw 9, 4, 3
; CHECK-P7: and 7, 8, 5
; CHECK-P7: rldicr 4, 6, 0, 61
; CHECK-P7: and 8, 9, 5
; CHECK-P7: .LBB0_1: # %L.entry
; CHECK-P7: lwarx 9, 0, 4
; CHECK-P7: and 6, 9, 5
; CHECK-P7: cmpw 0, 6, 8
; CHECK-P7: bne 0, .LBB0_3
; CHECK-P7: andc 9, 9, 5
; CHECK-P7: or 9, 9, 7
; CHECK-P7: stwcx. 9, 0, 4
; CHECK-P7: bne 0, .LBB0_1
; CHECK-P7: b .LBB0_4
; CHECK-P7: .LBB0_3: # %L.entry
; CHECK-P7: stwcx. 9, 0, 4
; CHECK-P7: .LBB0_4: # %L.entry
; CHECK-P7: srw 3, 6, 3
; CHECK-P7: lwsync
; CHECK-P7: cmplwi 3, 33059
; CHECK-P7: lhz 3, 46(1)
; CHECK-P7: cmplwi 3, 234
L.entry:
%value.addr = alloca i16, align 2
store i16 -32477, i16* %value.addr, align 2
%0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
%1 = extractvalue { i16, i1 } %0, 1
br i1 %1, label %L.B0000, label %L.B0003
L.B0003: ; preds = %L.entry
%puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
ret i32 1
L.B0000: ; preds = %L.entry
%2 = load i16, i16* %value.addr, align 2
%3 = icmp eq i16 %2, 234
br i1 %3, label %L.B0001, label %L.B0005
L.B0005: ; preds = %L.B0000
%puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
ret i32 1
L.B0001: ; preds = %L.B0000
%puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
ret i32 0
}
; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #0

View File

@ -404,6 +404,7 @@ define void @test39() {
define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test40:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: b .LBB40_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB40_1:
@ -423,6 +424,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test41:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB41_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -444,6 +446,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test42:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB42_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -465,6 +468,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test43:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB43_2
; PPC64LE-NEXT: .p2align 5
@ -485,6 +489,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test44:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB44_2
; PPC64LE-NEXT: .p2align 5
@ -505,6 +510,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test45:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB45_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -527,6 +533,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test46:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB46_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -549,6 +556,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test47:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB47_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -571,6 +579,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test48:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB48_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -593,6 +602,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test49:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB49_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -615,6 +625,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test50:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: b .LBB50_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB50_1:
@ -634,6 +645,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test51:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB51_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -655,6 +667,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test52:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB52_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -676,6 +689,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test53:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB53_2
; PPC64LE-NEXT: .p2align 5
@ -696,6 +710,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test54:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB54_2
; PPC64LE-NEXT: .p2align 5
@ -716,6 +731,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test55:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB55_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -738,6 +754,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test56:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB56_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -760,6 +777,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test57:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB57_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -782,6 +800,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test58:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB58_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -804,6 +823,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test59:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB59_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -1248,6 +1268,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) {
define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test80:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: b .LBB80_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB80_1:
@ -1267,6 +1288,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test81:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB81_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -1288,6 +1310,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test82:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: .LBB82_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -1309,6 +1332,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test83:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB83_2
; PPC64LE-NEXT: .p2align 5
@ -1329,6 +1353,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test84:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB84_2
; PPC64LE-NEXT: .p2align 5
@ -1349,6 +1374,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test85:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB85_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -1371,6 +1397,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test86:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB86_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -1393,6 +1420,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test87:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB87_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -1415,6 +1443,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test88:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB88_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -1437,6 +1466,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test89:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB89_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
@ -1459,6 +1489,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test90:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: b .LBB90_2
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB90_1:
@ -1478,6 +1509,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test91:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB91_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -1499,6 +1531,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test92:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: .LBB92_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
@ -1520,6 +1553,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test93:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB93_2
; PPC64LE-NEXT: .p2align 5
@ -1540,6 +1574,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test94:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: b .LBB94_2
; PPC64LE-NEXT: .p2align 5
@ -1560,6 +1595,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test95:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB95_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -1582,6 +1618,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test96:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .LBB96_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -1604,6 +1641,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test97:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB97_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -1626,6 +1664,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test98:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB98_1:
; PPC64LE-NEXT: lharx 6, 0, 3
@ -1648,6 +1687,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test99:
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: .LBB99_1:
; PPC64LE-NEXT: lharx 6, 0, 3

View File

@ -4780,3 +4780,42 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub
ret <2 x double> %res
}
; PR35977
define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
; CHECK-LABEL: test_zext_v8i8_to_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
; CHECK-NEXT: retq
%tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
%tmp2 = load <8 x i8>, <8 x i8>* %tmp
%tmp3 = extractelement <8 x i8> %tmp2, i32 0
%tmp4 = zext i8 %tmp3 to i16
%tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
%tmp6 = extractelement <8 x i8> %tmp2, i32 1
%tmp7 = zext i8 %tmp6 to i16
%tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
%tmp9 = extractelement <8 x i8> %tmp2, i32 2
%tmp10 = zext i8 %tmp9 to i16
%tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
%tmp12 = extractelement <8 x i8> %tmp2, i32 3
%tmp13 = zext i8 %tmp12 to i16
%tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
%tmp15 = extractelement <8 x i8> %tmp2, i32 4
%tmp16 = zext i8 %tmp15 to i16
%tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
%tmp18 = extractelement <8 x i8> %tmp2, i32 5
%tmp19 = zext i8 %tmp18 to i16
%tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
%tmp21 = extractelement <8 x i8> %tmp2, i32 6
%tmp22 = zext i8 %tmp21 to i16
%tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
%tmp24 = extractelement <8 x i8> %tmp2, i32 7
%tmp25 = zext i8 %tmp24 to i16
%tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
%tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
store <8 x i16> %tmp27, <8 x i16>* %tmp28
ret void
}

View File

@ -1,10 +1,13 @@
; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck -check-prefixes=CHECK,NOBZERO %s
; RUN: llc < %s -mtriple=x86_64-apple-ios10.0-simulator | FileCheck -check-prefixes=CHECK,NOBZERO %s
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK-LABEL: foo:
; CHECK: {{calll|callq}} ___bzero
; BZERO: {{calll|callq}} ___bzero
; NOBZERO-NOT: bzero
define void @foo(i8* %p, i32 %len) {
call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
ret void

View File

@ -19,7 +19,8 @@ entry:
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
ret { i64, i64 } %.fca.1.insert
}
; CHECK: lock cmpxchg16b
; CHECK: lock
; CHECK-NEXT: cmpxchg16b
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }

View File

@ -0,0 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux %s -o - | FileCheck %s
@x = global i8 0, align 1
@y = global i32 0, align 4
@z = global i24 0, align 4
define void @PR35761(i32 %call) {
; CHECK-LABEL: PR35761:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl {{.*}}(%rip), %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: movzbl {{.*}}(%rip), %ecx
; CHECK-NEXT: xorl $255, %ecx
; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: movw %cx, {{.*}}(%rip)
; CHECK-NEXT: movb $0, z+{{.*}}(%rip)
; CHECK-NEXT: retq
entry:
%0 = load i8, i8* @x, align 1
%tobool = trunc i8 %0 to i1
%conv = zext i1 %tobool to i32
%or = or i32 32767, %call
%neg = xor i32 %or, -1
%neg1 = xor i32 %neg, -1
%1 = load i32, i32* @y, align 4
%xor = xor i32 %neg1, %1
%or2 = or i32 %conv, %xor
%conv3 = trunc i32 %or2 to i8
%bf.load = load i24, i24* @z, align 4
%2 = zext i8 %conv3 to i24
%bf.value = and i24 %2, 4194303
store i24 %bf.value, i24* @z, align 2
ret void
}

View File

@ -0,0 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - -mattr=avx512bw | FileCheck %s
define void @test3(i32 %c, <64 x i1>* %ptr) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: kmovd %ecx, %k0
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, (%eax)
; CHECK-NEXT: retl
%cmp = icmp eq i32 %c, 0
%insert = insertelement <64 x i1> undef, i1 %cmp, i32 0
%shuf = shufflevector <64 x i1> %insert, <64 x i1> undef, <64 x i32> zeroinitializer
store <64 x i1> %shuf, <64 x i1>* %ptr
ret void
}

View File

@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }>
@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8
@tf_3_var_136 = global i64 0, align 8
@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1
define void @PR35763() {
; CHECK-LABEL: PR35763:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
; CHECK-NEXT: movzwl z+{{.*}}(%rip), %ecx
; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: movq %rcx, {{.*}}(%rip)
; CHECK-NEXT: movl z+{{.*}}(%rip), %eax
; CHECK-NEXT: movzbl z+{{.*}}(%rip), %ecx
; CHECK-NEXT: shlq $32, %rcx
; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: movabsq $1090921758719, %rax # imm = 0xFE0000FFFF
; CHECK-NEXT: andq %rcx, %rax
; CHECK-NEXT: movl %eax, z+{{.*}}(%rip)
; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movb %al, z+{{.*}}(%rip)
; CHECK-NEXT: retq
entry:
%0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8
%conv = sext i16 %0 to i32
%bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 1) to i32*), align 2
%bf.clear = and i32 %bf.load, 2097151
%bf.cast = zext i32 %bf.clear to i64
%conv1 = trunc i64 %bf.cast to i32
%or = or i32 %conv, %conv1
%conv2 = trunc i32 %or to i16
%conv3 = zext i16 %conv2 to i64
store i64 %conv3, i64* @tf_3_var_136, align 8
%bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
%bf.clear5 = and i40 %bf.load4, -8589869057
store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
ret void
}

View File

@ -207,13 +207,12 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
define <16 x i8> @var_shuffle_v16i8(<16 x i8> %v, <16 x i8> %indices) nounwind {
; SSSE3-LABEL: var_shuffle_v16i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pshufb %xmm0, %xmm1
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: pshufb %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; AVX-LABEL: var_shuffle_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%index0 = extractelement <16 x i8> %indices, i32 0
%index1 = extractelement <16 x i8> %indices, i32 1

View File

@ -1277,3 +1277,183 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
%ret7 = insertelement <8 x float> %ret6, float %v7, i32 7
ret <8 x float> %ret7
}
define <8 x i32> @pr35820(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind {
; AVX1-LABEL: pr35820:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm1, %r8
; AVX1-NEXT: movq %r8, %r10
; AVX1-NEXT: shrq $30, %r10
; AVX1-NEXT: vmovq %xmm1, %r9
; AVX1-NEXT: movq %r9, %rsi
; AVX1-NEXT: shrq $30, %rsi
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andl $3, %r9d
; AVX1-NEXT: andl $12, %esi
; AVX1-NEXT: andl $3, %r8d
; AVX1-NEXT: andl $12, %r10d
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movq %rax, %rdi
; AVX1-NEXT: shrq $30, %rdi
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: shrq $30, %rdx
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: andl $12, %edx
; AVX1-NEXT: andl $3, %eax
; AVX1-NEXT: andl $12, %edi
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rdx), %xmm0, %xmm0
; AVX1-NEXT: vpinsrd $2, -24(%rsp,%rax,4), %xmm0, %xmm0
; AVX1-NEXT: vpinsrd $3, -24(%rsp,%rdi), %xmm0, %xmm0
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rsi), %xmm1, %xmm1
; AVX1-NEXT: vpinsrd $2, -24(%rsp,%r8,4), %xmm1, %xmm1
; AVX1-NEXT: vpinsrd $3, -24(%rsp,%r10), %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: pr35820:
; INT256: # %bb.0: # %entry
; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
; INT256-NEXT: retq
entry:
%tmp1 = extractelement <8 x i32> %indices, i32 0
%vecext2.8 = extractelement <4 x i32> %v, i32 %tmp1
%tmp2 = extractelement <8 x i32> %indices, i32 1
%vecext2.9 = extractelement <4 x i32> %v, i32 %tmp2
%tmp3 = extractelement <8 x i32> %indices, i32 2
%vecext2.10 = extractelement <4 x i32> %v, i32 %tmp3
%tmp4 = extractelement <8 x i32> %indices, i32 3
%vecext2.11 = extractelement <4 x i32> %v, i32 %tmp4
%tmp5 = extractelement <8 x i32> %indices, i32 4
%vecext2.12 = extractelement <4 x i32> %v, i32 %tmp5
%tmp6 = extractelement <8 x i32> %indices, i32 5
%vecext2.13 = extractelement <4 x i32> %v, i32 %tmp6
%tmp7 = extractelement <8 x i32> %indices, i32 6
%vecext2.14 = extractelement <4 x i32> %v, i32 %tmp7
%tmp8 = extractelement <8 x i32> %indices, i32 7
%vecext2.15 = extractelement <4 x i32> %v, i32 %tmp8
%tmp9 = insertelement <8 x i32> undef, i32 %vecext2.8, i32 0
%tmp10 = insertelement <8 x i32> %tmp9, i32 %vecext2.9, i32 1
%tmp11 = insertelement <8 x i32> %tmp10, i32 %vecext2.10, i32 2
%tmp12 = insertelement <8 x i32> %tmp11, i32 %vecext2.11, i32 3
%tmp13 = insertelement <8 x i32> %tmp12, i32 %vecext2.12, i32 4
%tmp14 = insertelement <8 x i32> %tmp13, i32 %vecext2.13, i32 5
%tmp15 = insertelement <8 x i32> %tmp14, i32 %vecext2.14, i32 6
%tmp16 = insertelement <8 x i32> %tmp15, i32 %vecext2.15, i32 7
ret <8 x i32> %tmp16
}
define <8 x float> @pr35820_float(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind {
; AVX1-LABEL: pr35820_float:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm1, %r8
; AVX1-NEXT: movq %r8, %r10
; AVX1-NEXT: shrq $30, %r10
; AVX1-NEXT: vmovq %xmm1, %r9
; AVX1-NEXT: movq %r9, %rdx
; AVX1-NEXT: shrq $30, %rdx
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andl $3, %r9d
; AVX1-NEXT: andl $12, %edx
; AVX1-NEXT: andl $3, %r8d
; AVX1-NEXT: andl $12, %r10d
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movq %rax, %rdi
; AVX1-NEXT: shrq $30, %rdi
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rsi
; AVX1-NEXT: shrq $30, %rsi
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: andl $12, %esi
; AVX1-NEXT: andl $3, %eax
; AVX1-NEXT: andl $12, %edi
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; INT256-LABEL: pr35820_float:
; INT256: # %bb.0: # %entry
; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
; INT256-NEXT: retq
entry:
%tmp1 = extractelement <8 x i32> %indices, i32 0
%vecext2.8 = extractelement <4 x float> %v, i32 %tmp1
%tmp2 = extractelement <8 x i32> %indices, i32 1
%vecext2.9 = extractelement <4 x float> %v, i32 %tmp2
%tmp3 = extractelement <8 x i32> %indices, i32 2
%vecext2.10 = extractelement <4 x float> %v, i32 %tmp3
%tmp4 = extractelement <8 x i32> %indices, i32 3
%vecext2.11 = extractelement <4 x float> %v, i32 %tmp4
%tmp5 = extractelement <8 x i32> %indices, i32 4
%vecext2.12 = extractelement <4 x float> %v, i32 %tmp5
%tmp6 = extractelement <8 x i32> %indices, i32 5
%vecext2.13 = extractelement <4 x float> %v, i32 %tmp6
%tmp7 = extractelement <8 x i32> %indices, i32 6
%vecext2.14 = extractelement <4 x float> %v, i32 %tmp7
%tmp8 = extractelement <8 x i32> %indices, i32 7
%vecext2.15 = extractelement <4 x float> %v, i32 %tmp8
%tmp9 = insertelement <8 x float> undef, float %vecext2.8, i32 0
%tmp10 = insertelement <8 x float> %tmp9, float %vecext2.9, i32 1
%tmp11 = insertelement <8 x float> %tmp10, float %vecext2.10, i32 2
%tmp12 = insertelement <8 x float> %tmp11, float %vecext2.11, i32 3
%tmp13 = insertelement <8 x float> %tmp12, float %vecext2.12, i32 4
%tmp14 = insertelement <8 x float> %tmp13, float %vecext2.13, i32 5
%tmp15 = insertelement <8 x float> %tmp14, float %vecext2.14, i32 6
%tmp16 = insertelement <8 x float> %tmp15, float %vecext2.15, i32 7
ret <8 x float> %tmp16
}
define <4 x i32> @big_source(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
; AVX-LABEL: big_source:
; AVX: # %bb.0: # %entry
; AVX-NEXT: pushq %rbp
; AVX-NEXT: movq %rsp, %rbp
; AVX-NEXT: andq $-32, %rsp
; AVX-NEXT: subq $64, %rsp
; AVX-NEXT: vmovq %xmm1, %rax
; AVX-NEXT: movq %rax, %rcx
; AVX-NEXT: shrq $30, %rcx
; AVX-NEXT: andl $28, %ecx
; AVX-NEXT: vpextrq $1, %xmm1, %rdx
; AVX-NEXT: movq %rdx, %rsi
; AVX-NEXT: sarq $32, %rsi
; AVX-NEXT: andl $7, %eax
; AVX-NEXT: andl $7, %edx
; AVX-NEXT: vmovaps %ymm0, (%rsp)
; AVX-NEXT: andl $7, %esi
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vpinsrd $1, (%rsp,%rcx), %xmm0, %xmm0
; AVX-NEXT: vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0
; AVX-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0
; AVX-NEXT: movq %rbp, %rsp
; AVX-NEXT: popq %rbp
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
entry:
%tmp1 = extractelement <4 x i32> %indices, i32 0
%vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1
%tmp2 = extractelement <4 x i32> %indices, i32 1
%vecext2.9 = extractelement <8 x i32> %v, i32 %tmp2
%tmp3 = extractelement <4 x i32> %indices, i32 2
%vecext2.10 = extractelement <8 x i32> %v, i32 %tmp3
%tmp4 = extractelement <4 x i32> %indices, i32 3
%vecext2.11 = extractelement <8 x i32> %v, i32 %tmp4
%tmp9 = insertelement <4 x i32> undef, i32 %vecext2.8, i32 0
%tmp10 = insertelement <4 x i32> %tmp9, i32 %vecext2.9, i32 1
%tmp11 = insertelement <4 x i32> %tmp10, i32 %vecext2.10, i32 2
%tmp12 = insertelement <4 x i32> %tmp11, i32 %vecext2.11, i32 3
ret <4 x i32> %tmp12
}

View File

@ -135,3 +135,29 @@ Ltmp1:
.cv_filechecksums # File index to string table offset subsection
.cv_stringtable # String table
# CHECK-LABEL: FunctionLineTable [
# CHECK: LinkageName: ?baz@@YAXXZ
# CHECK: Flags: 0x1
# CHECK: CodeSize: 0x3D
# CHECK: FilenameSegment [
# CHECK: Filename: D:\src\llvm\build\t.cpp (0x0)
# CHECK: +0x0 [
# CHECK: LineNumberStart: 13
# CHECK: ]
# CHECK: +0x1 [
# CHECK: LineNumberStart: 14
# CHECK: ]
# CHECK: +0x8 [
# CHECK: LineNumberStart: 15
# CHECK: ]
# There shouldn't be any other line number entries because all the other
# .cv_locs are on line 15 where the top-level inline call site is.
# CHECK-NOT: LineNumberStart
# CHECK: +0x34 [
# CHECK: LineNumberStart: 16
# CHECK: ]
# CHECK: +0x3B [
# CHECK: LineNumberStart: 17
# CHECK: ]
# CHECK: ]
# CHECK: ]

View File

@ -99,7 +99,8 @@
// CHECK: shll $2, %eax
sall $2, %eax
// CHECK: rep movsb
// CHECK: rep
// CHECK-NEXT: movsb
rep # comment
movsb
@ -1557,3 +1558,38 @@ ptwriteq 0xdeadbeef(%rbx,%rcx,8)
// CHECK: ptwriteq %rax
// CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0]
ptwriteq %rax
// __asm __volatile(
// "pushf \n\t"
// "popf \n\t"
// "rep \n\t"
// ".byte 0x0f, 0xa7, 0xd0"
// );
// CHECK: pushfq
// CHECK-NEXT: popfq
// CHECK-NEXT: rep
// CHECK-NEXT: .byte 15
// CHECK-NEXT: .byte 167
// CHECK-NEXT: .byte 208
pushfq
popfq
rep
.byte 15
.byte 167
.byte 208
// CHECK: lock
// CHECK: cmpxchgl
cmp $0, %edx
je 1f
lock
1: cmpxchgl %ecx,(%rdi)
// CHECK: rep
// CHECK-NEXT: byte
rep
.byte 0xa4 # movsb
// CHECK: lock
// This line has to be the last one in the file
lock

View File

@ -0,0 +1,46 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"
%struct.CFVS = type { %struct.Vec }
%struct.Vec = type { i8 }
%struct.S = type { i8 }
define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 {
entry:
%this.addr = alloca %struct.CFVS*, align 8
store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8
%this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8
%m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0
ret void
}
declare dereferenceable(1) %struct.S* @_Z3Getv()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "bz188598-b.cpp", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 2}
!6 = !{i32 7, !"PIC Level", i32 2}
!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2)
!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS")
!10 = !DIFile(filename: "./bz188598.h", directory: "")
!11 = !{!12, !27}
!12 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !9, file: !10, line: 9, baseType: !13, size: 8)
!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !10, line: 4, size: 8, elements: !14, templateParams: !19, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
!14 = !{!35}
!19 = !{!20}
!20 = !DITemplateValueParameter(name: "F", type: !21, value: %struct.S* ()* @_Z3Getv)
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
!22 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !10, line: 2, baseType: !23)
!23 = !DISubroutineType(types: !24)
!24 = !{!35}
!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !30}
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

View File

@ -0,0 +1,69 @@
; RUN: opt -module-summary -o %t1.bc %s
; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique2.ll
; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3.
; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \
; RUN: -r %t1.bc,_ZN1CD2Ev,pl \
; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \
; RUN: -r %t1.bc,_Z3Getv,l \
; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \
; RUN: -r %t2.bc,_Z3Getv,l
; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s
; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}}
; CHECK: define available_externally{{.*}} void @_ZN4CFVSD2Ev
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"
%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }>
%class.A = type { %struct.Vec }
%struct.Vec = type { i8 }
%struct.CFVS = type { %struct.Vec }
%struct.S = type { i8 }
define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 !dbg !8 {
entry:
%this.addr = alloca %class.C*, align 8
%this1 = load %class.C*, %class.C** %this.addr, align 8
%m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2
call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50
ret void
}
declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr
declare dereferenceable(1) %struct.S* @_Z3Getv()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 2}
!6 = !{i32 7, !"PIC Level", i32 2}
!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !9, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !46, variables: !2)
!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !1, line: 5, size: 128, elements: !10, vtableHolder: !9, identifier: "_ZTS1C")
!10 = !{!38, !46}
!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !16, line: 4, size: 8, elements: !17, templateParams: !22, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
!16 = !DIFile(filename: "./bz188598.h", directory: ".")
!17 = !{!55}
!22 = !{!23}
!23 = !DITemplateValueParameter(name: "F", type: !24, value: %struct.S* ()* @_Z3Getv)
!24 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64)
!25 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !16, line: 2, baseType: !26)
!26 = !DISubroutineType(types: !27)
!27 = !{!55}
!38 = !DIDerivedType(tag: DW_TAG_member, name: "m", scope: !9, file: !1, line: 7, baseType: !39, size: 8, offset: 72)
!39 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !16, line: 7, size: 8, elements: !40, identifier: "_ZTS4CFVS")
!40 = !{!41}
!41 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !39, file: !16, line: 9, baseType: !15, size: 8)
!46 = !DISubprogram(name: "~C", scope: !9, file: !1, line: 6, type: !47, isLocal: false, isDefinition: false, scopeLine: 6, containingType: !9, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 0, flags: DIFlagPrototyped, isOptimized: false)
!47 = !DISubroutineType(types: !48)
!48 = !{!55}
!50 = !DILocation(line: 9, scope: !51)
!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9)
!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

View File

@ -0,0 +1,19 @@
; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
; Select when both offset and scale reg are present.
define i64 @test1(i1 %c, i64* %b, i64 %scale) {
; CHECK-LABEL: @test1
entry:
; CHECK-LABEL: entry:
%g = getelementptr inbounds i64, i64* %b, i64 %scale
%g1 = getelementptr inbounds i64, i64* %g, i64 8
%g2 = getelementptr inbounds i64, i64* %g, i64 16
%s = select i1 %c, i64* %g1, i64* %g2
; CHECK-NOT: sunkaddr
%v = load i64 , i64* %s, align 8
ret i64 %v
}

View File

@ -1,4 +1,5 @@
; RUN: opt -S -gvn-hoist < %s | FileCheck %s
; CHECK-LABEL: build_tree
; CHECK: load
; CHECK: load
; Check that the load is not hoisted because the call can potentially
@ -23,3 +24,47 @@ do.end: ; preds = %do.body
}
declare i1 @pqdownheap(i32)
@i = external hidden unnamed_addr global i32, align 4
@j = external hidden unnamed_addr global [573 x i32], align 4
@v = external global i1
; CHECK-LABEL: test
; CHECK-LABEL: do.end
; CHECK: load
; Check that the load is not hoisted because the call can potentially
; modify the global
define i32 @test() {
entry:
br label %for.cond
for.cond:
%a3 = load volatile i1, i1* @v
br i1 %a3, label %for.body, label %while.end
for.body:
br label %if.then
if.then:
%tmp4 = load i32, i32* @i, align 4
br label %for.cond
while.end:
br label %do.body
do.body:
%tmp9 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
i32 0, i32 1), align 4
%tmp10 = load i32, i32* @i, align 4
call void @fn()
%a1 = load volatile i1, i1* @v
br i1 %a1, label %do.body, label %do.end
do.end:
%tmp20 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
i32 0, i32 1), align 4
ret i32 %tmp20
}
declare void @fn()

View File

@ -0,0 +1,43 @@
; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@global = external local_unnamed_addr global i64, align 8
@global.1 = external local_unnamed_addr global i64, align 8
@global.2 = external local_unnamed_addr global i64, align 8
; Function Attrs: norecurse noreturn nounwind uwtable
define void @hoge() local_unnamed_addr #0 {
bb:
br label %bb1
bb1: ; preds = %bb26, %bb
%tmp = load i64, i64* @global, align 8, !tbaa !1
%tmp2 = icmp eq i64 %tmp, 0
br i1 %tmp2, label %bb27, label %bb3
bb3: ; preds = %bb1
%tmp4 = load i64, i64* @global.1, align 8, !tbaa !1
%tmp5 = icmp eq i64 %tmp4, 0
br i1 %tmp5, label %bb23, label %bb23
bb23: ; preds = %bb3, %bb3
br label %bb26
bb26: ; preds = %bb27, %bb23
br label %bb1
bb27: ; preds = %bb1
br label %bb26
}
attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.ident = !{!0}
!0 = !{!"clang version 7.0.0 "}
!1 = !{!2, !2, i64 0}
!2 = !{!"long", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}

View File

@ -0,0 +1,75 @@
; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
@global = external global i64, align 8
define void @f() {
bb:
br label %bb1
bb1:
%tmp = load i64, i64* @global, align 8
%tmp2 = icmp eq i64 %tmp, 0
br i1 %tmp2, label %bb27, label %bb3
bb3:
%tmp4 = load i64, i64* @global, align 8
%tmp5 = icmp eq i64 %tmp4, 0
br i1 %tmp5, label %bb6, label %bb7
bb6:
br label %bb7
bb7:
%tmp8 = phi i1 [ true, %bb3 ], [ undef, %bb6 ]
%tmp9 = select i1 %tmp8, i64 %tmp4, i64 0
br i1 false, label %bb10, label %bb23
bb10:
%tmp11 = load i64, i64* @global, align 8
%tmp12 = icmp slt i64 %tmp11, 5
br i1 %tmp12, label %bb13, label %bb17
bb13:
br label %bb14
bb14:
br i1 undef, label %bb15, label %bb16
bb15:
unreachable
bb16:
br label %bb10
bb17:
br label %bb18
bb18:
br i1 undef, label %bb22, label %bb13
bb19:
br i1 undef, label %bb20, label %bb21
bb20:
unreachable
bb21:
br label %bb18
bb22:
br label %bb23
bb23:
br i1 undef, label %bb24, label %bb13
bb24:
br i1 undef, label %bb26, label %bb25
bb25:
br label %bb19
bb26:
br label %bb1
bb27:
br label %bb24
}

View File

@ -0,0 +1,53 @@
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@a = common local_unnamed_addr global i32 0, align 4
@b = common local_unnamed_addr global i8 0, align 1
; Function Attrs: norecurse nounwind uwtable
define void @doit1() local_unnamed_addr{
entry:
br label %for.body
for.body:
%main.iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%i8.iv = phi i8 [ 0, %entry ], [ %i8.add, %for.body ]
%i32.iv = phi i32 [ 0, %entry ], [ %i32.add, %for.body ]
%trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8
%i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv
%noop.conv.under.pse = and i32 %i32.iv, 255
%i32.add = add nuw nsw i32 %noop.conv.under.pse, 9
%inc = add i32 %main.iv, 1
%tobool = icmp eq i32 %inc, 16
br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body
; CHECK-LABEL: @doit1(
; CHECK: vector.body:
; CHECK-NEXT: [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
; CHECK-NEXT: [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
; CHECK-NEXT: [[MAIN_IV_NEXT]] = add i32 [[MAIN_IV]], 4
; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
for.cond.for.end_crit_edge:
store i8 %i8.add, i8* @b, align 1
br label %for.end
for.end:
ret void
}

View File

@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
define void @mainTest(i32* %ptr) #0 {
; CHECK-LABEL: @mainTest(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA5:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], undef
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], undef
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], undef
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef
; CHECK-NEXT: br label [[LOOP]]
; CHECK: bail_out:
; CHECK-NEXT: ret void
;
entry:
%cmp = icmp eq i32* %ptr, null
br i1 %cmp, label %loop, label %bail_out
loop:
%dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
%0 = load i32, i32 * %ptr , align 4
%1 = mul i32 %0, %0
%2 = add i32 1, %1
%3 = getelementptr inbounds i32, i32 * %ptr, i64 1
%4 = load i32, i32 * %3 , align 4
%5 = mul i32 %4, %4
%6 = add i32 %2, %4
%7 = add i32 %6, %5
%8 = getelementptr inbounds i32, i32 *%ptr, i64 2
%9 = load i32, i32 * %8 , align 4
%10 = mul i32 %9, %9
%11 = add i32 %7, %9
%12 = add i32 %11, %10
%13 = sext i32 %9 to i64
%14 = getelementptr inbounds i32, i32 *%ptr, i64 3
%15 = load i32, i32 * %14 , align 4
%16 = mul i32 %15, %15
%17 = add i32 %12, %15
%18 = add i32 %17, %16
br label %loop
bail_out:
ret void
}
attributes #0 = { "target-cpu"="westmere" }

View File

@ -0,0 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
define void @test() #0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 3, i64 2, i64 1, i64 0>, [[TMP4]]
; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef
; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef
; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0
; CHECK-NEXT: [[JOIN:%.*]] = add i64 undef, [[ZSUM]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef
; CHECK-NEXT: br label [[LOOP]]
;
entry:
br label %loop
loop:
%dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ]
%0 = phi i64 [ 2, %entry ], [ %fork, %loop ]
%inc1 = add i64 %0, 1
%inc2 = add i64 %0, 2
%inc11 = add i64 1, %inc1
%exact1 = ashr exact i64 %inc11, 32
%inc3 = add i64 %0, 3
%dummy_add = add i16 0, 0
%inc12 = add i64 1, %inc2
%exact2 = ashr exact i64 %inc12, 32
%dummy_shl = shl i64 %inc3, 32
%inc13 = add i64 1, %inc3
%exact3 = ashr exact i64 %inc13, 32
%fork = add i64 %0, 0
%sum1 = add i64 %exact3, %exact2
%sum2 = add i64 %sum1, %exact1
%zsum = add i64 %sum2, 0
%sext22 = add i64 1, %fork
%exact4 = ashr exact i64 %sext22, 32
%join = add i64 %fork, %zsum
%last = add i64 %join, %exact4
br label %loop
}

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
define { i64, i64 } @patatino(double %arg) {
; CHECK-LABEL: @patatino(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
; CHECK-NEXT: ret { i64, i64 } [[TMP17]]
;
bb:
%tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
%tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
%tmp2 = fmul double %tmp1, %arg
%tmp3 = fadd double %tmp, %tmp2
%tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
%tmp5 = fadd double %tmp4, %tmp3
%tmp6 = fptosi double %tmp5 to i32
%tmp7 = sext i32 %tmp6 to i64
%tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
%tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
%tmp10 = fmul double %tmp9, %arg
%tmp11 = fadd double %tmp8, %tmp10
%tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
%tmp13 = fadd double %tmp12, %tmp11
%tmp14 = fptosi double %tmp13 to i32
%tmp15 = sext i32 %tmp14 to i64
%tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
%tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
ret { i64, i64 } %tmp17
}

View File

@ -0,0 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s
define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5
; CHECK-NEXT: ret void
;
entry:
%0 = extractelement <16 x half> undef, i32 4
%conv.i.4.i = fpext half %0 to float
%1 = bitcast float %conv.i.4.i to i32
%vecins.i.4.i = insertelement <8 x i32> undef, i32 %1, i32 4
%2 = extractelement <16 x half> undef, i32 5
%conv.i.5.i = fpext half %2 to float
%3 = bitcast float %conv.i.5.i to i32
%vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5
ret void
}

View File

@ -7,8 +7,8 @@ target triple = "x86_64-apple-macosx10.8.0"
define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -20,8 +20,8 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -64,18 +64,18 @@ declare void @llvm.assume(i1) nounwind
; This entire tree is ephemeral, don't vectorize any of it.
define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_eph(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@ -100,18 +100,18 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
; CHECK-NEXT: ret <4 x float> undef
;
; ZEROTHRESH-LABEL: @simple_select_eph(
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@ -175,8 +175,8 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_insert_out_of_order(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -188,8 +188,8 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -233,8 +233,8 @@ declare void @f32_user(float) #0
; Multiple users of the final constructed vector
define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_users(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -247,8 +247,8 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_users(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
@ -291,18 +291,18 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
; Unused insertelement
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_no_users(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@ -330,18 +330,18 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; CHECK-NEXT: ret <4 x float> [[RD]]
;
; ZEROTHRESH-LABEL: @simple_select_no_users(
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
@ -387,25 +387,25 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
; to do this backwards this backwards
define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
; CHECK-LABEL: @reconstruct(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
; CHECK-NEXT: ret <4 x i32> [[RD]]
;
; ZEROTHRESH-LABEL: @reconstruct(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]]
;
%c0 = extractelement <4 x i32> %c, i32 0
@ -421,8 +421,8 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_v2(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
@ -430,12 +430,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
; CHECK-NEXT: ret <2 x float> [[RB]]
;
; ZEROTHRESH-LABEL: @simple_select_v2(
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
@ -464,12 +464,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
; (low cost threshold needed to force this to happen)
define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
; CHECK-LABEL: @simple_select_partial_vector(
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@ -485,12 +485,12 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
; CHECK-NEXT: ret <4 x float> [[RB]]
;
; ZEROTHRESH-LABEL: @simple_select_partial_vector(
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
@ -530,7 +530,7 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
; must be rescheduled. The case here is from compiling Julia.
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @reschedule_extract(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@ -542,7 +542,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: ret <4 x float> [[V3]]
;
; ZEROTHRESH-LABEL: @reschedule_extract(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@ -576,7 +576,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; instructions that are erased.
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @take_credit(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@ -588,7 +588,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: ret <4 x float> [[V3]]
;
; ZEROTHRESH-LABEL: @take_credit(
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@ -622,10 +622,10 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
; CHECK-LABEL: @multi_tree(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@ -640,10 +640,10 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
;
; ZEROTHRESH-LABEL: @multi_tree(
; ZEROTHRESH-NEXT: entry:
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
@ -675,7 +675,7 @@ entry:
define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
; CHECK-LABEL: @_vadd256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
@ -696,7 +696,7 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
;
; ZEROTHRESH-LABEL: @_vadd256(
; ZEROTHRESH-NEXT: entry:
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1

View File

@ -1,11 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
; CHECK-LABEL: julia_2xdouble
; CHECK: load <2 x double>
; CHECK: load <2 x double>
; CHECK: fmul <2 x double>
; CHECK: fadd <2 x double>
define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
; CHECK-LABEL: @julia_2xdouble(
; CHECK-NEXT: top:
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
%x0 = load double, double* %px0, align 4
@ -29,12 +48,40 @@ top:
ret void
}
; CHECK-LABEL: julia_4xfloat
; CHECK: load <4 x float>
; CHECK: load <4 x float>
; CHECK: fmul <4 x float>
; CHECK: fadd <4 x float>
define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
; CHECK-LABEL: @julia_4xfloat(
; CHECK-NEXT: top:
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
%x0 = load float, float* %px0, align 4
@ -76,9 +123,27 @@ top:
ret void
}
; CHECK-LABEL: julia_load_array_of_float
; CHECK: fsub <4 x float>
define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
; CHECK-LABEL: @julia_load_array_of_float(
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%a_arr = load [4 x float], [4 x float]* %a, align 4
%a0 = extractvalue [4 x float] %a_arr, 0
@ -102,11 +167,27 @@ top:
ret void
}
; CHECK-LABEL: julia_load_array_of_i32
; CHECK: load <4 x i32>
; CHECK: load <4 x i32>
; CHECK: sub <4 x i32>
define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
; CHECK-LABEL: @julia_load_array_of_i32(
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%a_arr = load [4 x i32], [4 x i32]* %a, align 4
%a0 = extractvalue [4 x i32] %a_arr, 0
@ -132,9 +213,30 @@ top:
; Almost identical to previous test, but for type that should NOT be vectorized.
;
; CHECK-LABEL: julia_load_array_of_i16
; CHECK-NOT: i2>
define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
; CHECK-LABEL: @julia_load_array_of_i16(
; CHECK-NEXT: top:
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]]
; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]]
; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]]
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]]
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
; CHECK-NEXT: store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%a_arr = load [4 x i16], [4 x i16]* %a, align 4
%a0 = extractvalue [4 x i16] %a_arr, 0
@ -160,11 +262,27 @@ top:
%pseudovec = type { float, float, float, float }
; CHECK-LABEL: julia_load_struct_of_float
; CHECK: load <4 x float>
; CHECK: load <4 x float>
; CHECK: fsub <4 x float>
define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
; CHECK-LABEL: @julia_load_struct_of_float(
; CHECK-NEXT: top:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
; CHECK-NEXT: ret void
;
top:
%a_struct = load %pseudovec, %pseudovec* %a, align 4
%a0 = extractvalue %pseudovec %a_struct, 0

View File

@ -1,15 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev3-linux-gnu"
; We used to crash on this example because we were building a constant
; expression during vectorization and the vectorizer expects instructions
; as elements of the vectorized tree.
; CHECK-LABEL: @test
; PR19621
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: bb279:
; CHECK-NEXT: br label [[BB283:%.*]]
; CHECK: bb283:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
; CHECK-NEXT: br label [[BB284:%.*]]
; CHECK: bb284:
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
; CHECK-NEXT: br label [[BB21_I:%.*]]
; CHECK: bb21.i:
; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
; CHECK: bb22.i:
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
; CHECK-NEXT: br label [[BB32_I:%.*]]
; CHECK: bb32.i:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
; CHECK: exit:
; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float
; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
; CHECK-NEXT: br label [[BB283]]
;
bb279:
br label %bb283
@ -62,6 +93,12 @@ exit:
; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
; The code that handles insertelement instructions must handle this.
define <4 x double> @constant_folding() {
; CHECK-LABEL: @constant_folding(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
; CHECK-NEXT: ret <4 x double> [[I2]]
;
entry:
%t0 = fadd double 1.000000e+00 , 0.000000e+00
%t1 = fadd double 1.000000e+00 , 1.000000e+00
@ -71,10 +108,3 @@ entry:
%i2 = insertelement <4 x double> %i1, double %t3, i32 0
ret <4 x double> %i2
}
; CHECK-LABEL: @constant_folding
; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
; CHECK: ret <4 x double> %[[V3]]

View File

@ -0,0 +1,77 @@
; XFAIL: *
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
; FIXME: Merge into backedge-id-bug
; Variant which has an issue with region construction
define amdgpu_kernel void @loop_backedge_misidentified_alt(i32 addrspace(1)* %arg0) #0 {
entry:
%tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
%load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
%i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
br label %LOOP.HEADER
LOOP.HEADER:
%i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
%tmp12 = zext i32 %i to i64
%tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
%tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
%tmp15 = extractelement <4 x i32> %tmp14, i64 0
%tmp16 = and i32 %tmp15, 65535
%tmp17 = icmp eq i32 %tmp16, 1
br i1 %tmp17, label %bb18, label %bb62
bb18:
%tmp19 = extractelement <2 x i32> %tmp, i64 0
%tmp22 = lshr i32 %tmp19, 16
%tmp24 = urem i32 %tmp22, 52
%tmp25 = mul nuw nsw i32 %tmp24, 52
br label %INNER_LOOP
INNER_LOOP:
%inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
call void asm sideeffect "; inner loop body", ""() #0
%inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
%inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
INNER_LOOP_BREAK:
%tmp59 = extractelement <4 x i32> %tmp14, i64 2
call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
br label %END_ELSE_BLOCK
bb62:
%load13 = icmp ult i32 %tmp16, 271
;br i1 %load13, label %bb64, label %INCREMENT_I
; branching directly to the return avoids the bug
br i1 %load13, label %RETURN, label %INCREMENT_I
bb64:
call void asm sideeffect "s_nop 42", "~{memory}"() #0
br label %RETURN
INCREMENT_I:
%inc.i = add i32 %i, 1
call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
br label %END_ELSE_BLOCK
END_ELSE_BLOCK:
%i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
%cmp.end.else.block = icmp eq i32 %i.final, -1
br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
RETURN:
call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { convergent nounwind }
attributes #1 = { convergent nounwind readnone }

View File

@ -0,0 +1,163 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s
; StructurizeCFG::orderNodes used an arbitrary and nonsensical sorting
; function which broke the basic backedge identification algorithm. It
; would use RPO order, but then do a weird partial sort by the loop
; depth assuming blocks are sorted by loop. However a block can appear
; in between blocks of a loop that is not part of a loop, breaking the
; assumption of the sort.
;
; The collectInfos must be done in RPO order. The actual
; structurization order I think is less important, but unless the loop
; headers are identified in RPO order, it finds the wrong set of back
; edges.
define amdgpu_kernel void @loop_backedge_misidentified(i32 addrspace(1)* %arg0) #0 {
; CHECK-LABEL: @loop_backedge_misidentified(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef
; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]]
; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: LOOP.HEADER:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[FLOW4:%.*]] ]
; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true
; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]]
; CHECK: Flow2:
; CHECK-NEXT: br label [[FLOW]]
; CHECK: bb18:
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0
; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP19]], 16
; CHECK-NEXT: [[TMP24:%.*]] = urem i32 [[TMP22]], 52
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
; CHECK: Flow3:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
; CHECK-NEXT: br i1 [[TMP2]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]]
; CHECK: INNER_LOOP:
; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: [[INNER_LOOP_J_INC]] = add nsw i32 [[INNER_LOOP_J]], 1
; CHECK-NEXT: [[INNER_LOOP_CMP:%.*]] = icmp eq i32 [[INNER_LOOP_J]], 0
; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]]
; CHECK: INNER_LOOP_BREAK:
; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2
; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
; CHECK-NEXT: br label [[FLOW3:%.*]]
; CHECK: bb62:
; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271
; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[LOAD13]], true
; CHECK-NEXT: br i1 [[TMP3]], label [[INCREMENT_I:%.*]], label [[FLOW1:%.*]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I]] ], [ undef, [[BB62]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
; CHECK-NEXT: br i1 [[TMP6]], label [[BB64:%.*]], label [[FLOW2:%.*]]
; CHECK: bb64:
; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ]
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ]
; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW3]]
; CHECK: INCREMENT_I:
; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1
; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336
; CHECK-NEXT: br label [[FLOW1]]
; CHECK: END_ELSE_BLOCK:
; CHECK-NEXT: [[I_FINAL:%.*]] = phi i32 [ [[TMP1]], [[FLOW3]] ]
; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337
; CHECK-NEXT: [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[I_FINAL]], -1
; CHECK-NEXT: br label [[FLOW4]]
; CHECK: Flow4:
; CHECK-NEXT: [[TMP10]] = phi i32 [ [[I_FINAL]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW3]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ]
; CHECK-NEXT: br i1 [[TMP11]], label [[RETURN:%.*]], label [[LOOP_HEADER]]
; CHECK: RETURN:
; CHECK-NEXT: call void asm sideeffect "s_nop 0x99
; CHECK-NEXT: store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8
; CHECK-NEXT: ret void
;
entry:
%tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
%load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
%i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
br label %LOOP.HEADER
LOOP.HEADER:
%i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
%tmp12 = zext i32 %i to i64
%tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
%tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
%tmp15 = extractelement <4 x i32> %tmp14, i64 0
%tmp16 = and i32 %tmp15, 65535
%tmp17 = icmp eq i32 %tmp16, 1
br i1 %tmp17, label %bb18, label %bb62
bb18:
%tmp19 = extractelement <2 x i32> %tmp, i64 0
%tmp22 = lshr i32 %tmp19, 16
%tmp24 = urem i32 %tmp22, 52
%tmp25 = mul nuw nsw i32 %tmp24, 52
br label %INNER_LOOP
INNER_LOOP:
%inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
call void asm sideeffect "; inner loop body", ""() #0
%inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
%inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
INNER_LOOP_BREAK:
%tmp59 = extractelement <4 x i32> %tmp14, i64 2
call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
br label %END_ELSE_BLOCK
bb62:
%load13 = icmp ult i32 %tmp16, 271
br i1 %load13, label %bb64, label %INCREMENT_I
bb64:
call void asm sideeffect "s_nop 42", "~{memory}"() #0
br label %RETURN
INCREMENT_I:
%inc.i = add i32 %i, 1
call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
br label %END_ELSE_BLOCK
END_ELSE_BLOCK:
%i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
%cmp.end.else.block = icmp eq i32 %i.final, -1
br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
RETURN:
call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
ret void
}
; The same function, except break to return block goes directly to the
; return, which managed to hide the bug.
; FIXME: Merge variant from backedge-id-bug-xfail
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { convergent nounwind }
attributes #1 = { convergent nounwind readnone }

View File

@ -0,0 +1,2 @@
if not 'AMDGPU' in config.root.targets:
config.unsupported = True

View File

@ -1,32 +1,76 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
define void @main(float addrspace(1)* %out) {
; CHECK: main_body:
; CHECK: br label %LOOP.outer
; CHECK-LABEL: @main(
; CHECK-NEXT: main_body:
; CHECK-NEXT: br label [[LOOP_OUTER:%.*]]
; CHECK: LOOP.outer:
; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ]
; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: LOOP:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ]
; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1
; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true
; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow3:
; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]]
; CHECK: ENDLOOP:
; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3
; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00
; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]]
; CHECK-NEXT: ret void
; CHECK: ENDIF:
; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1
; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true
; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ]
; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]]
; CHECK: IF29:
; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ]
; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ]
; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ]
; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ]
; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]]
; CHECK: ENDIF28:
; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00
; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2
; CHECK-NEXT: br label [[FLOW1]]
;
main_body:
br label %LOOP.outer
; CHECK: LOOP.outer:
; CHECK: br label %LOOP
LOOP.outer: ; preds = %ENDIF28, %main_body
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
br label %LOOP
; CHECK: LOOP:
; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
LOOP: ; preds = %IF29, %LOOP.outer
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
%tmp20 = add i32 %temp4.0, 1
%tmp22 = icmp sgt i32 %tmp20, 3
br i1 %tmp22, label %ENDLOOP, label %ENDIF
; CHECK: Flow3
; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
; CHECK: ENDLOOP:
; CHECK: ret void
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
%tmp23 = icmp eq i32 %tmp20, 3
@ -34,29 +78,14 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO
store float %.45, float addrspace(1)* %out
ret void
; CHECK: ENDIF:
; CHECK: br i1 %tmp31, label %IF29, label %Flow1
ENDIF: ; preds = %LOOP
%tmp31 = icmp sgt i32 %tmp20, 1
br i1 %tmp31, label %IF29, label %ENDIF28
; CHECK: Flow:
; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
; CHECK: IF29:
; CHECK: br label %Flow1
IF29: ; preds = %ENDIF
%tmp32 = icmp sgt i32 %tmp20, 2
br i1 %tmp32, label %ENDLOOP, label %LOOP
; CHECK: Flow1:
; CHECK: br label %Flow
; CHECK: Flow2:
; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
; CHECK: ENDIF28:
; CHECK: br label %Flow3
ENDIF28: ; preds = %ENDIF
%tmp35 = fadd float %temp8.0.ph, 1.0
%tmp36 = icmp sgt i32 %tmp20, 2

View File

@ -0,0 +1,26 @@
# RUN: yaml2obj %s -o %t.o
# RUN: llvm-readobj -needed-libs %t.o | FileCheck %s
# CHECK: NeededLibraries [
# CHECK-NEXT: /usr/lib/libSystem.B.dylib
# CHECK-NEXT: ]
!mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x01000007
cpusubtype: 0x00000003
filetype: 0x00000001
ncmds: 1
sizeofcmds: 56
flags: 0x00002000
reserved: 0x00000000
LoadCommands:
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 81985536
compatibility_version: 65536
PayloadString: /usr/lib/libSystem.B.dylib

View File

@ -39,6 +39,8 @@ class MachODumper : public ObjDumper {
void printUnwindInfo() override;
void printStackMap() const override;
void printNeededLibraries() override;
// MachO-specific.
void printMachODataInCode() override;
void printMachOVersionMin() override;
@ -675,6 +677,34 @@ void MachODumper::printStackMap() const {
StackMapV2Parser<support::big>(StackMapContentsArray));
}
void MachODumper::printNeededLibraries() {
ListScope D(W, "NeededLibraries");
using LibsTy = std::vector<StringRef>;
LibsTy Libs;
for (const auto &Command : Obj->load_commands()) {
if (Command.C.cmd == MachO::LC_LOAD_DYLIB ||
Command.C.cmd == MachO::LC_ID_DYLIB ||
Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
Command.C.cmd == MachO::LC_REEXPORT_DYLIB ||
Command.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
Command.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command);
if (Dl.dylib.name < Dl.cmdsize) {
auto *P = static_cast<const char*>(Command.Ptr) + Dl.dylib.name;
Libs.push_back(P);
}
}
}
std::stable_sort(Libs.begin(), Libs.end());
for (const auto &L : Libs) {
outs() << " " << L << "\n";
}
}
void MachODumper::printMachODataInCode() {
for (const auto &Load : Obj->load_commands()) {
if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {

View File

@ -258,3 +258,98 @@ TEST(DominatorTreeBatchUpdates, InsertDeleteExhaustive) {
EXPECT_TRUE(PDT.verify());
}
}
// These are some odd flowgraphs, usually generated from csmith cases,
// which are difficult on post dom trees.
TEST(DominatorTreeBatchUpdates, InfiniteLoop) {
std::vector<CFGBuilder::Arc> Arcs = {
{"1", "2"},
{"2", "3"},
{"3", "6"}, {"3", "5"},
{"4", "5"},
{"5", "2"},
{"6", "3"}, {"6", "4"}};
// SplitBlock on 3 -> 5
std::vector<CFGBuilder::Update> Updates = {
{CFGInsert, {"N", "5"}}, {CFGInsert, {"3", "N"}}, {CFGDelete, {"3", "5"}}};
CFGHolder Holder;
CFGBuilder B(Holder.F, Arcs, Updates);
DominatorTree DT(*Holder.F);
EXPECT_TRUE(DT.verify());
PostDomTree PDT(*Holder.F);
EXPECT_TRUE(PDT.verify());
while (B.applyUpdate())
;
auto DomUpdates = ToDomUpdates(B, Updates);
DT.applyUpdates(DomUpdates);
EXPECT_TRUE(DT.verify());
PDT.applyUpdates(DomUpdates);
EXPECT_TRUE(PDT.verify());
}
TEST(DominatorTreeBatchUpdates, DeadBlocks) {
std::vector<CFGBuilder::Arc> Arcs = {
{"1", "2"},
{"2", "3"},
{"3", "4"}, {"3", "7"},
{"4", "4"},
{"5", "6"}, {"5", "7"},
{"6", "7"},
{"7", "2"}, {"7", "8"}};
// Remove dead 5 and 7,
// plus SplitBlock on 7 -> 8
std::vector<CFGBuilder::Update> Updates = {
{CFGDelete, {"6", "7"}}, {CFGDelete, {"5", "7"}}, {CFGDelete, {"5", "6"}},
{CFGInsert, {"N", "8"}}, {CFGInsert, {"7", "N"}}, {CFGDelete, {"7", "8"}}};
CFGHolder Holder;
CFGBuilder B(Holder.F, Arcs, Updates);
DominatorTree DT(*Holder.F);
EXPECT_TRUE(DT.verify());
PostDomTree PDT(*Holder.F);
EXPECT_TRUE(PDT.verify());
while (B.applyUpdate())
;
auto DomUpdates = ToDomUpdates(B, Updates);
DT.applyUpdates(DomUpdates);
EXPECT_TRUE(DT.verify());
PDT.applyUpdates(DomUpdates);
EXPECT_TRUE(PDT.verify());
}
TEST(DominatorTreeBatchUpdates, InfiniteLoop2) {
std::vector<CFGBuilder::Arc> Arcs = {
{"1", "2"},
{"2", "6"}, {"2", "3"},
{"3", "4"},
{"4", "5"}, {"4", "6"},
{"5", "4"},
{"6", "2"}};
// SplitBlock on 4 -> 6
std::vector<CFGBuilder::Update> Updates = {
{CFGInsert, {"N", "6"}}, {CFGInsert, {"4", "N"}}, {CFGDelete, {"4", "6"}}};
CFGHolder Holder;
CFGBuilder B(Holder.F, Arcs, Updates);
DominatorTree DT(*Holder.F);
EXPECT_TRUE(DT.verify());
PostDomTree PDT(*Holder.F);
EXPECT_TRUE(PDT.verify());
while (B.applyUpdate())
;
auto DomUpdates = ToDomUpdates(B, Updates);
DT.applyUpdates(DomUpdates);
EXPECT_TRUE(DT.verify());
PDT.applyUpdates(DomUpdates);
EXPECT_TRUE(PDT.verify());
}

View File

@ -925,3 +925,28 @@ TEST(DominatorTree, InsertDeleteExhaustive) {
}
}
}
TEST(DominatorTree, InsertIntoIrreducible) {
std::vector<CFGBuilder::Arc> Arcs = {
{"0", "1"},
{"1", "27"}, {"1", "7"},
{"10", "18"},
{"13", "10"},
{"18", "13"}, {"18", "23"},
{"23", "13"}, {"23", "24"},
{"24", "1"}, {"24", "18"},
{"27", "24"}};
CFGHolder Holder;
CFGBuilder B(Holder.F, Arcs, {{Insert, {"7", "23"}}});
DominatorTree DT(*Holder.F);
EXPECT_TRUE(DT.verify());
B.applyUpdate();
BasicBlock *From = B.getOrAddBlock("7");
BasicBlock *To = B.getOrAddBlock("23");
DT.insertEdge(From, To);
EXPECT_TRUE(DT.verify());
}

View File

@ -33,6 +33,7 @@ do_asserts="no"
do_compare="yes"
do_rt="yes"
do_libs="yes"
do_libcxxabi="yes"
do_libunwind="yes"
do_test_suite="yes"
do_openmp="yes"
@ -62,6 +63,7 @@ function usage() {
echo " For example -svn-path trunk or -svn-path branches/release_37"
echo " -no-rt Disable check-out & build Compiler-RT"
echo " -no-libs Disable check-out & build libcxx/libcxxabi/libunwind"
echo " -no-libcxxabi Disable check-out & build libcxxabi"
echo " -no-libunwind Disable check-out & build libunwind"
echo " -no-test-suite Disable check-out & build test-suite"
echo " -no-openmp Disable check-out & build libomp"
@ -135,6 +137,9 @@ while [ $# -gt 0 ]; do
-no-libs )
do_libs="no"
;;
-no-libcxxabi )
do_libcxxabi="no"
;;
-no-libunwind )
do_libunwind="no"
;;
@ -206,7 +211,10 @@ if [ $do_rt = "yes" ]; then
projects="$projects compiler-rt"
fi
if [ $do_libs = "yes" ]; then
projects="$projects libcxx libcxxabi"
projects="$projects libcxx"
if [ $do_libcxxabi = "yes" ]; then
projects="$projects libcxxabi"
fi
if [ $do_libunwind = "yes" ]; then
projects="$projects libunwind"
fi