Vendor import of llvm release_60 branch r323338:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@323338
This commit is contained in:
parent
d215fd3b74
commit
a096e0bdf6
@ -37,6 +37,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
|
||||
|
||||
set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
|
||||
|
||||
set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@)
|
||||
|
||||
set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@)
|
||||
|
||||
set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
|
||||
|
@ -54,6 +54,8 @@ Non-comprehensive list of changes in this release
|
||||
``DIVariables`` to the instructions in a ``Module``. The ``CheckDebugify``
|
||||
pass determines how much of the metadata is lost.
|
||||
|
||||
* Significantly improved quality of CodeView debug info for Windows.
|
||||
|
||||
* Note..
|
||||
|
||||
.. NOTE
|
||||
@ -69,10 +71,13 @@ Non-comprehensive list of changes in this release
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
Changes to the ARM Target
|
||||
-------------------------
|
||||
|
||||
During this release ...
|
||||
During this release the ARM target has:
|
||||
|
||||
* Got support for enabling SjLj exception handling on platforms where it
|
||||
isn't the default.
|
||||
|
||||
|
||||
Changes to the MIPS Target
|
||||
@ -89,7 +94,10 @@ Changes to the PowerPC Target
|
||||
Changes to the X86 Target
|
||||
-------------------------
|
||||
|
||||
During this release ...
|
||||
During this release ...
|
||||
|
||||
* Got support for enabling SjLj exception handling on platforms where it
|
||||
isn't the default.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
@ -116,8 +124,46 @@ Changes to the C API
|
||||
External Open Source Projects Using LLVM 6
|
||||
==========================================
|
||||
|
||||
* A project...
|
||||
JFS - JIT Fuzzing Solver
|
||||
------------------------
|
||||
|
||||
`JFS <https://github.com/delcypher/jfs>`_ is an experimental constraint solver
|
||||
designed to investigate using coverage guided fuzzing as an incomplete strategy
|
||||
for solving boolean, BitVector, and floating-point constraints.
|
||||
It is built on top of LLVM, Clang, LibFuzzer, and Z3.
|
||||
|
||||
The solver works by generating a C++ program where the reachability of an
|
||||
`abort()` statement is equivalent to finding a satisfying assignment to the
|
||||
constraints. This program is then compiled by Clang with `SanitizerCoverage
|
||||
<https://releases.llvm.org/6.0.0/tools/clang/docs/SanitizerCoverage.html>`_
|
||||
instrumentation and then fuzzed using :doc:`LibFuzzer <LibFuzzer>`.
|
||||
|
||||
Zig Programming Language
|
||||
------------------------
|
||||
|
||||
`Zig <http://ziglang.org>`_ is an open-source programming language designed
|
||||
for robustness, optimality, and clarity. It is intended to replace C. It
|
||||
provides high level features such as Generics,
|
||||
Compile Time Function Execution, and Partial Evaluation, yet exposes low level
|
||||
LLVM IR features such as Aliases. Zig uses Clang to provide automatic
|
||||
import of .h symbols - even inline functions and macros. Zig uses LLD combined
|
||||
with lazily building compiler-rt to provide out-of-the-box cross-compiling for
|
||||
all supported targets.
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
|
||||
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
@ -254,23 +254,23 @@ std::string RegionBase<Tr>::getNameStr() const {
|
||||
template <class Tr>
|
||||
void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
|
||||
if (!contains(BB))
|
||||
llvm_unreachable("Broken region found: enumerated BB not in region!");
|
||||
report_fatal_error("Broken region found: enumerated BB not in region!");
|
||||
|
||||
BlockT *entry = getEntry(), *exit = getExit();
|
||||
|
||||
for (BlockT *Succ :
|
||||
make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) {
|
||||
if (!contains(Succ) && exit != Succ)
|
||||
llvm_unreachable("Broken region found: edges leaving the region must go "
|
||||
"to the exit node!");
|
||||
report_fatal_error("Broken region found: edges leaving the region must go "
|
||||
"to the exit node!");
|
||||
}
|
||||
|
||||
if (entry != BB) {
|
||||
for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
|
||||
InvBlockTraits::child_end(BB))) {
|
||||
if (!contains(Pred))
|
||||
llvm_unreachable("Broken region found: edges entering the region must "
|
||||
"go to the entry node!");
|
||||
report_fatal_error("Broken region found: edges entering the region must "
|
||||
"go to the entry node!");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -557,7 +557,7 @@ void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
|
||||
} else {
|
||||
BlockT *BB = Element->template getNodeAs<BlockT>();
|
||||
if (getRegionFor(BB) != R)
|
||||
llvm_unreachable("BB map does not match region nesting");
|
||||
report_fatal_error("BB map does not match region nesting");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
int64_t &Off);
|
||||
|
||||
/// Parses tree in Ptr for base, index, offset addresses.
|
||||
static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG);
|
||||
static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -177,13 +177,7 @@ public:
|
||||
unsigned IACol);
|
||||
|
||||
/// Retreive the function info if this is a valid function id, or nullptr.
|
||||
MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) {
|
||||
if (FuncId >= Functions.size())
|
||||
return nullptr;
|
||||
if (Functions[FuncId].isUnallocatedFunctionInfo())
|
||||
return nullptr;
|
||||
return &Functions[FuncId];
|
||||
}
|
||||
MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId);
|
||||
|
||||
/// Saves the information from the currently parsed .cv_loc directive
|
||||
/// and sets CVLocSeen. When the next instruction is assembled an entry
|
||||
@ -199,50 +193,22 @@ public:
|
||||
CurrentCVLoc.setIsStmt(IsStmt);
|
||||
CVLocSeen = true;
|
||||
}
|
||||
void clearCVLocSeen() { CVLocSeen = false; }
|
||||
|
||||
bool getCVLocSeen() { return CVLocSeen; }
|
||||
void clearCVLocSeen() { CVLocSeen = false; }
|
||||
|
||||
const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
|
||||
|
||||
bool isValidCVFileNumber(unsigned FileNumber);
|
||||
|
||||
/// \brief Add a line entry.
|
||||
void addLineEntry(const MCCVLineEntry &LineEntry) {
|
||||
size_t Offset = MCCVLines.size();
|
||||
auto I = MCCVLineStartStop.insert(
|
||||
{LineEntry.getFunctionId(), {Offset, Offset + 1}});
|
||||
if (!I.second)
|
||||
I.first->second.second = Offset + 1;
|
||||
MCCVLines.push_back(LineEntry);
|
||||
}
|
||||
void addLineEntry(const MCCVLineEntry &LineEntry);
|
||||
|
||||
std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId) {
|
||||
std::vector<MCCVLineEntry> FilteredLines;
|
||||
std::vector<MCCVLineEntry> getFunctionLineEntries(unsigned FuncId);
|
||||
|
||||
auto I = MCCVLineStartStop.find(FuncId);
|
||||
if (I != MCCVLineStartStop.end())
|
||||
for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
|
||||
++Idx)
|
||||
if (MCCVLines[Idx].getFunctionId() == FuncId)
|
||||
FilteredLines.push_back(MCCVLines[Idx]);
|
||||
return FilteredLines;
|
||||
}
|
||||
std::pair<size_t, size_t> getLineExtent(unsigned FuncId);
|
||||
|
||||
std::pair<size_t, size_t> getLineExtent(unsigned FuncId) {
|
||||
auto I = MCCVLineStartStop.find(FuncId);
|
||||
// Return an empty extent if there are no cv_locs for this function id.
|
||||
if (I == MCCVLineStartStop.end())
|
||||
return {~0ULL, 0};
|
||||
return I->second;
|
||||
}
|
||||
|
||||
ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R) {
|
||||
if (R <= L)
|
||||
return None;
|
||||
if (L >= MCCVLines.size())
|
||||
return None;
|
||||
return makeArrayRef(&MCCVLines[L], R - L);
|
||||
}
|
||||
ArrayRef<MCCVLineEntry> getLinesForExtent(size_t L, size_t R);
|
||||
|
||||
/// Emits a line table substream.
|
||||
void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId,
|
||||
|
@ -628,7 +628,7 @@ struct SemiNCAInfo {
|
||||
DecreasingLevel>
|
||||
Bucket; // Queue of tree nodes sorted by level in descending order.
|
||||
SmallDenseSet<TreeNodePtr, 8> Affected;
|
||||
SmallDenseSet<TreeNodePtr, 8> Visited;
|
||||
SmallDenseMap<TreeNodePtr, unsigned, 8> Visited;
|
||||
SmallVector<TreeNodePtr, 8> AffectedQueue;
|
||||
SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
|
||||
};
|
||||
@ -706,7 +706,7 @@ struct SemiNCAInfo {
|
||||
// algorithm does not really know or use the set of roots and can make a
|
||||
// different (implicit) decision about which nodes within an infinite loop
|
||||
// becomes a root.
|
||||
if (DT.isVirtualRoot(TN->getIDom())) {
|
||||
if (TN && !DT.isVirtualRoot(TN->getIDom())) {
|
||||
DEBUG(dbgs() << "Root " << BlockNamePrinter(R)
|
||||
<< " is not virtual root's child\n"
|
||||
<< "The entire tree needs to be rebuilt\n");
|
||||
@ -753,14 +753,16 @@ struct SemiNCAInfo {
|
||||
|
||||
while (!II.Bucket.empty()) {
|
||||
const TreeNodePtr CurrentNode = II.Bucket.top().second;
|
||||
const unsigned CurrentLevel = CurrentNode->getLevel();
|
||||
II.Bucket.pop();
|
||||
DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
|
||||
<< BlockNamePrinter(CurrentNode) << "\n");
|
||||
II.Visited.insert(CurrentNode);
|
||||
|
||||
II.Visited.insert({CurrentNode, CurrentLevel});
|
||||
II.AffectedQueue.push_back(CurrentNode);
|
||||
|
||||
// Discover and collect affected successors of the current node.
|
||||
VisitInsertion(DT, BUI, CurrentNode, CurrentNode->getLevel(), NCD, II);
|
||||
VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II);
|
||||
}
|
||||
|
||||
// Finish by updating immediate dominators and levels.
|
||||
@ -772,13 +774,17 @@ struct SemiNCAInfo {
|
||||
const TreeNodePtr TN, const unsigned RootLevel,
|
||||
const TreeNodePtr NCD, InsertionInfo &II) {
|
||||
const unsigned NCDLevel = NCD->getLevel();
|
||||
DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n");
|
||||
DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel "
|
||||
<< RootLevel << "\n");
|
||||
|
||||
SmallVector<TreeNodePtr, 8> Stack = {TN};
|
||||
assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
|
||||
|
||||
SmallPtrSet<TreeNodePtr, 8> Processed;
|
||||
|
||||
do {
|
||||
TreeNodePtr Next = Stack.pop_back_val();
|
||||
DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n");
|
||||
|
||||
for (const NodePtr Succ :
|
||||
ChildrenGetter<IsPostDom>::Get(Next->getBlock(), BUI)) {
|
||||
@ -786,19 +792,31 @@ struct SemiNCAInfo {
|
||||
assert(SuccTN && "Unreachable successor found at reachable insertion");
|
||||
const unsigned SuccLevel = SuccTN->getLevel();
|
||||
|
||||
DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
|
||||
<< ", level = " << SuccLevel << "\n");
|
||||
DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = "
|
||||
<< SuccLevel << "\n");
|
||||
|
||||
// Do not process the same node multiple times.
|
||||
if (Processed.count(Next) > 0)
|
||||
continue;
|
||||
|
||||
// Succ dominated by subtree From -- not affected.
|
||||
// (Based on the lemma 2.5 from the second paper.)
|
||||
if (SuccLevel > RootLevel) {
|
||||
DEBUG(dbgs() << "\t\tDominated by subtree From\n");
|
||||
if (II.Visited.count(SuccTN) != 0)
|
||||
continue;
|
||||
if (II.Visited.count(SuccTN) != 0) {
|
||||
DEBUG(dbgs() << "\t\t\talready visited at level "
|
||||
<< II.Visited[SuccTN] << "\n\t\t\tcurrent level "
|
||||
<< RootLevel << ")\n");
|
||||
|
||||
// A node can be necessary to visit again if we see it again at
|
||||
// a lower level than before.
|
||||
if (II.Visited[SuccTN] >= RootLevel)
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "\t\tMarking visited not affected "
|
||||
<< BlockNamePrinter(Succ) << "\n");
|
||||
II.Visited.insert(SuccTN);
|
||||
II.Visited.insert({SuccTN, RootLevel});
|
||||
II.VisitedNotAffectedQueue.push_back(SuccTN);
|
||||
Stack.push_back(SuccTN);
|
||||
} else if ((SuccLevel > NCDLevel + 1) &&
|
||||
@ -809,6 +827,8 @@ struct SemiNCAInfo {
|
||||
II.Bucket.push({SuccLevel, SuccTN});
|
||||
}
|
||||
}
|
||||
|
||||
Processed.insert(Next);
|
||||
} while (!Stack.empty());
|
||||
}
|
||||
|
||||
@ -920,21 +940,21 @@ struct SemiNCAInfo {
|
||||
const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To);
|
||||
const TreeNodePtr NCD = DT.getNode(NCDBlock);
|
||||
|
||||
// To dominates From -- nothing to do.
|
||||
if (ToTN == NCD) return;
|
||||
// If To dominates From -- nothing to do.
|
||||
if (ToTN != NCD) {
|
||||
DT.DFSInfoValid = false;
|
||||
|
||||
DT.DFSInfoValid = false;
|
||||
const TreeNodePtr ToIDom = ToTN->getIDom();
|
||||
DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
|
||||
<< BlockNamePrinter(ToIDom) << "\n");
|
||||
|
||||
const TreeNodePtr ToIDom = ToTN->getIDom();
|
||||
DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom "
|
||||
<< BlockNamePrinter(ToIDom) << "\n");
|
||||
|
||||
// To remains reachable after deletion.
|
||||
// (Based on the caption under Figure 4. from the second paper.)
|
||||
if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
|
||||
DeleteReachable(DT, BUI, FromTN, ToTN);
|
||||
else
|
||||
DeleteUnreachable(DT, BUI, ToTN);
|
||||
// To remains reachable after deletion.
|
||||
// (Based on the caption under Figure 4. from the second paper.)
|
||||
if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN))
|
||||
DeleteReachable(DT, BUI, FromTN, ToTN);
|
||||
else
|
||||
DeleteUnreachable(DT, BUI, ToTN);
|
||||
}
|
||||
|
||||
if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
|
||||
}
|
||||
|
@ -95,14 +95,9 @@ private:
|
||||
bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
|
||||
|
||||
/// \brief Try to vectorize a list of operands.
|
||||
/// \@param BuildVector A list of users to ignore for the purpose of
|
||||
/// scheduling and cost estimation when NeedExtraction
|
||||
/// is false.
|
||||
/// \returns true if a value was vectorized.
|
||||
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
|
||||
ArrayRef<Value *> BuildVector = None,
|
||||
bool AllowReorder = false,
|
||||
bool NeedExtraction = false);
|
||||
bool AllowReorder = false);
|
||||
|
||||
/// \brief Try to vectorize a chain that may start at the operands of \p I.
|
||||
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
|
||||
|
@ -2700,8 +2700,13 @@ public:
|
||||
// we still need to collect it due to original value is different.
|
||||
// And later we will need all original values as anchors during
|
||||
// finding the common Phi node.
|
||||
// We also must reject the case when base offset is different and
|
||||
// scale reg is not null, we cannot handle this case due to merge of
|
||||
// different offsets will be used as ScaleReg.
|
||||
if (DifferentField != ExtAddrMode::MultipleFields &&
|
||||
DifferentField != ExtAddrMode::ScaleField) {
|
||||
DifferentField != ExtAddrMode::ScaleField &&
|
||||
(DifferentField != ExtAddrMode::BaseOffsField ||
|
||||
!NewAddrMode.ScaledReg)) {
|
||||
AddrModes.emplace_back(NewAddrMode);
|
||||
return true;
|
||||
}
|
||||
|
@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) {
|
||||
for (auto &GV : M.globals()) {
|
||||
// Merge is safe for "normal" internal or external globals only
|
||||
if (GV.isDeclaration() || GV.isThreadLocal() ||
|
||||
GV.hasSection() || GV.hasImplicitSection())
|
||||
GV.hasSection() || GV.hasImplicitSection() ||
|
||||
GV.hasDLLExportStorageClass())
|
||||
continue;
|
||||
|
||||
// It's not safe to merge globals that may be preempted
|
||||
|
@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
|
||||
CurSrcPair = Pair;
|
||||
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
|
||||
!DisableAdvCopyOpt, TII);
|
||||
ValueTrackerResult Res;
|
||||
bool ShouldRewrite = false;
|
||||
|
||||
do {
|
||||
// Follow the chain of copies until we reach the top of the use-def chain
|
||||
// or find a more suitable source.
|
||||
Res = ValTracker.getNextSource();
|
||||
// Follow the chain of copies until we find a more suitable source, a phi
|
||||
// or have to abort.
|
||||
while (true) {
|
||||
ValueTrackerResult Res = ValTracker.getNextSource();
|
||||
// Abort at the end of a chain (without finding a suitable source).
|
||||
if (!Res.isValid())
|
||||
break;
|
||||
return false;
|
||||
|
||||
// Insert the Def -> Use entry for the recently found source.
|
||||
ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
|
||||
@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
|
||||
if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
|
||||
return false;
|
||||
|
||||
// Keep following the chain if the value isn't any better yet.
|
||||
const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
|
||||
ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
|
||||
CurSrcPair.SubReg);
|
||||
} while (!ShouldRewrite);
|
||||
if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg))
|
||||
continue;
|
||||
|
||||
// Continue looking for new sources...
|
||||
if (Res.isValid())
|
||||
continue;
|
||||
// We currently cannot deal with subreg operands on PHI instructions
|
||||
// (see insertPHI()).
|
||||
if (PHICount > 0 && CurSrcPair.SubReg != 0)
|
||||
continue;
|
||||
|
||||
// Do not continue searching for a new source if the there's at least
|
||||
// one use-def which cannot be rewritten.
|
||||
if (!ShouldRewrite)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PHICount >= RewritePHILimit) {
|
||||
DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
|
||||
return false;
|
||||
// We found a suitable source, and are done with this chain.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we did not find a more suitable source, there is nothing to optimize.
|
||||
@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
|
||||
assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
|
||||
|
||||
const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
|
||||
// NewRC is only correct if no subregisters are involved. findNextSource()
|
||||
// should have rejected those cases already.
|
||||
assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
|
||||
unsigned NewVR = MRI->createVirtualRegister(NewRC);
|
||||
MachineBasicBlock *MBB = OrigPHI->getParent();
|
||||
MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
|
||||
|
@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
|
||||
EVT ExtVT;
|
||||
if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
|
||||
isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
|
||||
// Only add this load if we can make it more narrow.
|
||||
if (ExtVT.bitsLT(Load->getMemoryVT()))
|
||||
|
||||
// ZEXTLOAD is already small enough.
|
||||
if (Load->getExtensionType() == ISD::ZEXTLOAD &&
|
||||
ExtVT.bitsGE(Load->getMemoryVT()))
|
||||
continue;
|
||||
|
||||
// Use LE to convert equal sized loads to zext.
|
||||
if (ExtVT.bitsLE(Load->getMemoryVT()))
|
||||
Loads.insert(Load);
|
||||
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
|
||||
if (Loads.size() == 0)
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
|
||||
SDValue MaskOp = N->getOperand(1);
|
||||
|
||||
// If it exists, fixup the single node we allow in the tree that needs
|
||||
// masking.
|
||||
if (FixupNode) {
|
||||
DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
|
||||
SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
|
||||
FixupNode->getValueType(0),
|
||||
SDValue(FixupNode, 0), MaskOp);
|
||||
@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
// Narrow any constants that need it.
|
||||
for (auto *LogicN : NodesWithConsts) {
|
||||
auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
|
||||
SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
|
||||
SDValue(C, 0), MaskOp);
|
||||
DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
|
||||
SDValue Op0 = LogicN->getOperand(0);
|
||||
SDValue Op1 = LogicN->getOperand(1);
|
||||
|
||||
if (isa<ConstantSDNode>(Op0))
|
||||
std::swap(Op0, Op1);
|
||||
|
||||
SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
|
||||
Op1, MaskOp);
|
||||
|
||||
DAG.UpdateNodeOperands(LogicN, Op0, And);
|
||||
}
|
||||
|
||||
// Create narrow loads.
|
||||
for (auto *Load : Loads) {
|
||||
DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
|
||||
SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
|
||||
SDValue(Load, 0), MaskOp);
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
|
||||
@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||
return SDValue();
|
||||
|
||||
// Loads must share the same base address
|
||||
BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
|
||||
BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
|
||||
int64_t ByteOffsetFromBase = 0;
|
||||
if (!Base)
|
||||
Base = Ptr;
|
||||
@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates(
|
||||
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
|
||||
// This holds the base pointer, index, and the offset in bytes from the base
|
||||
// pointer.
|
||||
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
|
||||
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
|
||||
EVT MemVT = St->getMemoryVT();
|
||||
|
||||
SDValue Val = peekThroughBitcast(St->getValue());
|
||||
@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates(
|
||||
EVT LoadVT;
|
||||
if (IsLoadSrc) {
|
||||
auto *Ld = cast<LoadSDNode>(Val);
|
||||
LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
|
||||
LBasePtr = BaseIndexOffset::match(Ld, DAG);
|
||||
LoadVT = Ld->getMemoryVT();
|
||||
// Load and store should be the same type.
|
||||
if (MemVT != LoadVT)
|
||||
@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates(
|
||||
return false;
|
||||
// The Load's Base Ptr must also match
|
||||
if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
|
||||
auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
|
||||
auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
|
||||
if (LoadVT != OtherLd->getMemoryVT())
|
||||
return false;
|
||||
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
|
||||
@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates(
|
||||
Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
|
||||
return false;
|
||||
}
|
||||
Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
|
||||
Ptr = BaseIndexOffset::match(Other, DAG);
|
||||
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
|
||||
};
|
||||
|
||||
@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
||||
if (Ld->getMemoryVT() != MemVT)
|
||||
break;
|
||||
|
||||
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
|
||||
BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
|
||||
// If this is not the first ptr that we check.
|
||||
int64_t LdOffset = 0;
|
||||
if (LdBasePtr.getBase().getNode()) {
|
||||
@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
|
||||
unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
|
||||
|
||||
// Check for BaseIndexOffset matching.
|
||||
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
|
||||
BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
|
||||
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
|
||||
BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
|
||||
int64_t PtrDiff;
|
||||
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
|
||||
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
|
||||
if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
|
||||
if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
|
||||
return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
|
||||
|
||||
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
|
||||
// able to calculate their relative offset if at least one arises
|
||||
// from an alloca. However, these allocas cannot overlap and we
|
||||
// can infer there is no alias.
|
||||
if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
|
||||
if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
|
||||
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
// If the base are the same frame index but the we couldn't find a
|
||||
// constant offset, (indices are different) be conservative.
|
||||
if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
|
||||
!MFI.isFixedObjectIndex(B->getIndex())))
|
||||
return false;
|
||||
}
|
||||
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
|
||||
// able to calculate their relative offset if at least one arises
|
||||
// from an alloca. However, these allocas cannot overlap and we
|
||||
// can infer there is no alias.
|
||||
if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
|
||||
if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
|
||||
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
// If the base are the same frame index but the we couldn't find a
|
||||
// constant offset, (indices are different) be conservative.
|
||||
if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
|
||||
!MFI.isFixedObjectIndex(B->getIndex())))
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
|
||||
bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
|
||||
bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
|
||||
bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
|
||||
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
|
||||
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
|
||||
bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
|
||||
bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
|
||||
bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
|
||||
bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
|
||||
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
|
||||
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
|
||||
|
||||
// If of mismatched base types or checkable indices we can check
|
||||
// they do not alias.
|
||||
if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
|
||||
(IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
|
||||
(IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
|
||||
return false;
|
||||
// If of mismatched base types or checkable indices we can check
|
||||
// they do not alias.
|
||||
if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
|
||||
(IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
|
||||
(IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
|
||||
// compared to the size and offset of the access, we may be able to prove they
|
||||
// do not alias. This check is conservative for now to catch cases created by
|
||||
// splitting vector types.
|
||||
// If we know required SrcValue1 and SrcValue2 have relatively large
|
||||
// alignment compared to the size and offset of the access, we may be able
|
||||
// to prove they do not alias. This check is conservative for now to catch
|
||||
// cases created by splitting vector types.
|
||||
int64_t SrcValOffset0 = Op0->getSrcValueOffset();
|
||||
int64_t SrcValOffset1 = Op1->getSrcValueOffset();
|
||||
unsigned OrigAlignment0 = Op0->getOriginalAlignment();
|
||||
@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
|
||||
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
|
||||
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
|
||||
|
||||
// There is no overlap between these relatively aligned accesses of similar
|
||||
// size. Return no alias.
|
||||
// There is no overlap between these relatively aligned accesses of
|
||||
// similar size. Return no alias.
|
||||
if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
|
||||
(OffAlign1 + NumBytes1) <= OffAlign0)
|
||||
return false;
|
||||
@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
|
||||
|
||||
// This holds the base pointer, index, and the offset in bytes from the base
|
||||
// pointer.
|
||||
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
|
||||
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
|
||||
|
||||
// We must have a base and an offset.
|
||||
if (!BasePtr.getBase().getNode())
|
||||
@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
|
||||
break;
|
||||
|
||||
// Find the base pointer and offset for this memory node.
|
||||
BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
|
||||
BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
|
||||
|
||||
// Check that the base pointer is the same as the original one.
|
||||
if (!BasePtr.equalBaseIndex(Ptr, DAG))
|
||||
|
@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
case ISD::ZERO_EXTEND:
|
||||
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
|
||||
DAG.getValueType(AtomicType));
|
||||
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
|
||||
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
|
||||
ExtRes = LHS;
|
||||
break;
|
||||
case ISD::ANY_EXTEND:
|
||||
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
|
||||
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
|
||||
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Invalid atomic op extension");
|
||||
|
@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
|
||||
if (VT.getSizeInBits() / 8 != Bytes)
|
||||
return false;
|
||||
|
||||
SDValue Loc = LD->getOperand(1);
|
||||
SDValue BaseLoc = Base->getOperand(1);
|
||||
|
||||
auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
|
||||
auto LocDecomp = BaseIndexOffset::match(Loc, *this);
|
||||
auto BaseLocDecomp = BaseIndexOffset::match(Base, *this);
|
||||
auto LocDecomp = BaseIndexOffset::match(LD, *this);
|
||||
|
||||
int64_t Offset = 0;
|
||||
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
|
||||
|
@ -21,6 +21,9 @@ using namespace llvm;
|
||||
|
||||
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
|
||||
const SelectionDAG &DAG, int64_t &Off) {
|
||||
// Conservatively fail if we a match failed..
|
||||
if (!Base.getNode() || !Other.Base.getNode())
|
||||
return false;
|
||||
// Initial Offset difference.
|
||||
Off = Other.Offset - Offset;
|
||||
|
||||
@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
|
||||
}
|
||||
|
||||
/// Parses tree in Ptr for base, index, offset addresses.
|
||||
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
|
||||
BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,
|
||||
const SelectionDAG &DAG) {
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
|
||||
// (((B + I*M) + c)) + c ...
|
||||
SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
|
||||
SDValue Index = SDValue();
|
||||
int64_t Offset = 0;
|
||||
bool IsIndexSignExt = false;
|
||||
|
||||
// pre-inc/pre-dec ops are components of EA.
|
||||
if (N->getAddressingMode() == ISD::PRE_INC) {
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
|
||||
Offset += C->getSExtValue();
|
||||
else // If unknown, give up now.
|
||||
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
|
||||
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
|
||||
Offset -= C->getSExtValue();
|
||||
else // If unknown, give up now.
|
||||
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
|
||||
}
|
||||
|
||||
// Consume constant adds & ors with appropriate masking.
|
||||
while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
|
||||
|
@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
|
||||
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
|
||||
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
|
||||
|
||||
// Darwin 10 and higher has an optimized __bzero.
|
||||
if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) {
|
||||
setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero");
|
||||
// Some darwins have an optimized __bzero/bzero function.
|
||||
switch (TT.getArch()) {
|
||||
case Triple::x86:
|
||||
case Triple::x86_64:
|
||||
if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
|
||||
setLibcallName(RTLIB::BZERO, "__bzero");
|
||||
break;
|
||||
case Triple::aarch64:
|
||||
setLibcallName(RTLIB::BZERO, "bzero");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (darwinHasSinCos(TT)) {
|
||||
|
@ -954,7 +954,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
|
||||
NewGV->setLinkage(GlobalValue::InternalLinkage);
|
||||
|
||||
Constant *C = NewGV;
|
||||
if (DGV)
|
||||
// Only create a bitcast if necessary. In particular, with
|
||||
// DebugTypeODRUniquing we may reach metadata in the destination module
|
||||
// containing a GV from the source module, in which case SGV will be
|
||||
// the same as DGV and NewGV, and TypeMap.get() will assert since it
|
||||
// assumes it is being invoked on a type in the source module.
|
||||
if (DGV && NewGV != SGV)
|
||||
C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
|
||||
|
||||
if (DGV && NewGV != DGV) {
|
||||
|
@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber,
|
||||
return true;
|
||||
}
|
||||
|
||||
MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) {
|
||||
if (FuncId >= Functions.size())
|
||||
return nullptr;
|
||||
if (Functions[FuncId].isUnallocatedFunctionInfo())
|
||||
return nullptr;
|
||||
return &Functions[FuncId];
|
||||
}
|
||||
|
||||
bool CodeViewContext::recordFunctionId(unsigned FuncId) {
|
||||
if (FuncId >= Functions.size())
|
||||
Functions.resize(FuncId + 1);
|
||||
@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS,
|
||||
OS.EmitValueImpl(SRE, 4);
|
||||
}
|
||||
|
||||
void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) {
|
||||
size_t Offset = MCCVLines.size();
|
||||
auto I = MCCVLineStartStop.insert(
|
||||
{LineEntry.getFunctionId(), {Offset, Offset + 1}});
|
||||
if (!I.second)
|
||||
I.first->second.second = Offset + 1;
|
||||
MCCVLines.push_back(LineEntry);
|
||||
}
|
||||
|
||||
std::vector<MCCVLineEntry>
|
||||
CodeViewContext::getFunctionLineEntries(unsigned FuncId) {
|
||||
std::vector<MCCVLineEntry> FilteredLines;
|
||||
auto I = MCCVLineStartStop.find(FuncId);
|
||||
if (I != MCCVLineStartStop.end()) {
|
||||
MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId);
|
||||
for (size_t Idx = I->second.first, End = I->second.second; Idx != End;
|
||||
++Idx) {
|
||||
unsigned LocationFuncId = MCCVLines[Idx].getFunctionId();
|
||||
if (LocationFuncId == FuncId) {
|
||||
// This was a .cv_loc directly for FuncId, so record it.
|
||||
FilteredLines.push_back(MCCVLines[Idx]);
|
||||
} else {
|
||||
// Check if the current location is inlined in this function. If it is,
|
||||
// synthesize a statement .cv_loc at the original inlined call site.
|
||||
auto I = SiteInfo->InlinedAtMap.find(LocationFuncId);
|
||||
if (I != SiteInfo->InlinedAtMap.end()) {
|
||||
MCCVFunctionInfo::LineInfo &IA = I->second;
|
||||
// Only add the location if it differs from the previous location.
|
||||
// Large inlined calls will have many .cv_loc entries and we only need
|
||||
// one line table entry in the parent function.
|
||||
if (FilteredLines.empty() ||
|
||||
FilteredLines.back().getFileNum() != IA.File ||
|
||||
FilteredLines.back().getLine() != IA.Line ||
|
||||
FilteredLines.back().getColumn() != IA.Col) {
|
||||
FilteredLines.push_back(MCCVLineEntry(
|
||||
MCCVLines[Idx].getLabel(),
|
||||
MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return FilteredLines;
|
||||
}
|
||||
|
||||
std::pair<size_t, size_t> CodeViewContext::getLineExtent(unsigned FuncId) {
|
||||
auto I = MCCVLineStartStop.find(FuncId);
|
||||
// Return an empty extent if there are no cv_locs for this function id.
|
||||
if (I == MCCVLineStartStop.end())
|
||||
return {~0ULL, 0};
|
||||
return I->second;
|
||||
}
|
||||
|
||||
ArrayRef<MCCVLineEntry> CodeViewContext::getLinesForExtent(size_t L, size_t R) {
|
||||
if (R <= L)
|
||||
return None;
|
||||
if (L >= MCCVLines.size())
|
||||
return None;
|
||||
return makeArrayRef(&MCCVLines[L], R - L);
|
||||
}
|
||||
|
||||
void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
|
||||
unsigned FuncId,
|
||||
const MCSymbol *FuncBegin,
|
||||
|
@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
|
||||
if (OpFlags & AArch64II::MO_GOT) {
|
||||
I.setDesc(TII.get(AArch64::LOADgot));
|
||||
I.getOperand(1).setTargetFlags(OpFlags);
|
||||
} else if (TM.getCodeModel() == CodeModel::Large) {
|
||||
// Materialize the global using movz/movk instructions.
|
||||
unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
auto InsertPt = std::next(I.getIterator());
|
||||
auto MovZ =
|
||||
BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi))
|
||||
.addDef(MovZDstReg);
|
||||
MovZ->addOperand(MF, I.getOperand(1));
|
||||
MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
|
||||
AArch64II::MO_NC);
|
||||
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
|
||||
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
|
||||
|
||||
auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags,
|
||||
unsigned Offset, unsigned ForceDstReg) {
|
||||
unsigned DstReg =
|
||||
ForceDstReg ? ForceDstReg
|
||||
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
|
||||
auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(),
|
||||
TII.get(AArch64::MOVKXi))
|
||||
.addDef(DstReg)
|
||||
.addReg(SrcReg);
|
||||
MovI->addOperand(MF, MachineOperand::CreateGA(
|
||||
GV, MovZ->getOperand(1).getOffset(), Flags));
|
||||
MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
|
||||
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
|
||||
return DstReg;
|
||||
};
|
||||
unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
|
||||
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
|
||||
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
|
||||
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
} else {
|
||||
I.setDesc(TII.get(AArch64::MOVaddr));
|
||||
I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
|
||||
|
@ -821,7 +821,6 @@ namespace llvm {
|
||||
MutableArrayRef<int> NewMask, unsigned Options = None);
|
||||
OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
|
||||
MutableArrayRef<int> NewMask);
|
||||
OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
|
||||
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
||||
ResultStack &Results);
|
||||
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
||||
@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
|
||||
return concat(Out[0], Out[1], Results);
|
||||
}
|
||||
|
||||
OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
|
||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||
|
||||
int VecLen = SM.Mask.size();
|
||||
SmallVector<uint8_t,128> UsedBytes(VecLen);
|
||||
bool HasUnused = false;
|
||||
for (int I = 0; I != VecLen; ++I) {
|
||||
if (SM.Mask[I] != -1)
|
||||
UsedBytes[I] = 0xFF;
|
||||
else
|
||||
HasUnused = true;
|
||||
}
|
||||
if (!HasUnused)
|
||||
return Va;
|
||||
SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
|
||||
Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
|
||||
return OpRef::res(Results.top());
|
||||
}
|
||||
|
||||
OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
|
||||
ResultStack &Results) {
|
||||
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
|
||||
|
@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
|
||||
|
||||
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
|
||||
|
||||
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
|
||||
for (MVT VT : MVT::integer_valuetypes()) {
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
|
||||
@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::Hi: return "PPCISD::Hi";
|
||||
case PPCISD::Lo: return "PPCISD::Lo";
|
||||
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
|
||||
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
|
||||
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
|
||||
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
|
||||
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
|
||||
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
||||
@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
return Op;
|
||||
}
|
||||
|
||||
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
|
||||
// compared to a value that is atomically loaded (atomic loads zero-extend).
|
||||
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
|
||||
"Expecting an atomic compare-and-swap here.");
|
||||
SDLoc dl(Op);
|
||||
auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
|
||||
EVT MemVT = AtomicNode->getMemoryVT();
|
||||
if (MemVT.getSizeInBits() >= 32)
|
||||
return Op;
|
||||
|
||||
SDValue CmpOp = Op.getOperand(2);
|
||||
// If this is already correctly zero-extended, leave it alone.
|
||||
auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
|
||||
if (DAG.MaskedValueIsZero(CmpOp, HighBits))
|
||||
return Op;
|
||||
|
||||
// Clear the high bits of the compare operand.
|
||||
unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
|
||||
SDValue NewCmpOp =
|
||||
DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
|
||||
DAG.getConstant(MaskVal, dl, MVT::i32));
|
||||
|
||||
// Replace the existing compare operand with the properly zero-extended one.
|
||||
SmallVector<SDValue, 4> Ops;
|
||||
for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
|
||||
Ops.push_back(AtomicNode->getOperand(i));
|
||||
Ops[2] = NewCmpOp;
|
||||
MachineMemOperand *MMO = AtomicNode->getMemOperand();
|
||||
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
|
||||
auto NodeTy =
|
||||
(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
|
||||
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
return LowerREM(Op, DAG);
|
||||
case ISD::BSWAP:
|
||||
return LowerBSWAP(Op, DAG);
|
||||
case ISD::ATOMIC_CMP_SWAP:
|
||||
return LowerATOMIC_CMP_SWAP(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -430,6 +430,11 @@ namespace llvm {
|
||||
/// The 4xf32 load used for v4i1 constants.
|
||||
QVLFSb,
|
||||
|
||||
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
|
||||
/// except they ensure that the compare input is zero-extended for
|
||||
/// sub-word versions because the atomic loads zero-extend.
|
||||
ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
|
||||
|
||||
/// GPRC = TOC_ENTRY GA, TOC
|
||||
/// Loads the entry for GA from the TOC, where the TOC base is given by
|
||||
/// the last operand.
|
||||
@ -955,6 +960,7 @@ namespace llvm {
|
||||
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
|
||||
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
|
||||
[SDNPHasChain, SDNPOptInGlue]>;
|
||||
|
||||
// PPC-specific atomic operations.
|
||||
def PPCatomicCmpSwap_8 :
|
||||
SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def PPCatomicCmpSwap_16 :
|
||||
SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
|
||||
@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
|
||||
}
|
||||
}
|
||||
|
||||
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
|
||||
(ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
|
||||
def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
|
||||
(ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
|
||||
|
||||
// Instructions to support atomic operations
|
||||
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
|
||||
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
|
||||
|
@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
.Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
|
||||
.Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
|
||||
Flags |= Prefix;
|
||||
if (getLexer().is(AsmToken::EndOfStatement)) {
|
||||
// We don't have real instr with the given prefix
|
||||
// let's use the prefix as the instr.
|
||||
// TODO: there could be several prefixes one after another
|
||||
Flags = X86::IP_NO_PREFIX;
|
||||
break;
|
||||
}
|
||||
Name = Parser.getTok().getString();
|
||||
Parser.Lex(); // eat the prefix
|
||||
// Hack: we could have something like "rep # some comment" or
|
||||
|
@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG,
|
||||
IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
|
||||
VT.getVectorNumElements());
|
||||
IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT);
|
||||
return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV,
|
||||
SDLoc(V), VT, IndicesVec, SrcVec);
|
||||
if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) {
|
||||
SrcVec =
|
||||
DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT),
|
||||
SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec)));
|
||||
}
|
||||
if (VT == MVT::v16i8)
|
||||
return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec);
|
||||
return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec);
|
||||
}
|
||||
|
||||
SDValue
|
||||
@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
|
||||
}
|
||||
|
||||
// For v64i1 without 64-bit support we need to split and rejoin.
|
||||
if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
|
||||
assert(Subtarget.hasBWI() && "Expected BWI to be legal");
|
||||
SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32);
|
||||
SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32);
|
||||
SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32);
|
||||
SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32);
|
||||
SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo);
|
||||
SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
|
||||
}
|
||||
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
|
||||
SDValue Op1Scalar;
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
|
||||
@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
}
|
||||
}
|
||||
|
||||
SDValue NewV1 = V1; // Save operand in case early exit happens.
|
||||
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
|
||||
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
|
||||
ShuffleVT) &&
|
||||
NewV1, DL, DAG, Subtarget, Shuffle,
|
||||
ShuffleSrcVT, ShuffleVT) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
Res = DAG.getBitcast(ShuffleSrcVT, V1);
|
||||
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
}
|
||||
}
|
||||
|
||||
SDValue NewV1 = V1; // Save operands in case early exit happens.
|
||||
SDValue NewV2 = V2;
|
||||
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
|
||||
V1, V2, DL, DAG, Subtarget, Shuffle,
|
||||
NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
|
||||
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
V1 = DAG.getBitcast(ShuffleSrcVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleSrcVT, V2);
|
||||
DCI.AddToWorklist(V2.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
|
||||
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
|
||||
DCI.AddToWorklist(NewV1.getNode());
|
||||
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
|
||||
DCI.AddToWorklist(NewV2.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
}
|
||||
|
||||
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
|
||||
AllowIntDomain, V1, V2, DL, DAG,
|
||||
Subtarget, Shuffle, ShuffleVT,
|
||||
PermuteImm) &&
|
||||
NewV1 = V1; // Save operands in case early exit happens.
|
||||
NewV2 = V2;
|
||||
if (matchBinaryPermuteVectorShuffle(
|
||||
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
|
||||
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
DCI.AddToWorklist(V2.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
|
||||
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
|
||||
DCI.AddToWorklist(NewV1.getNode());
|
||||
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
|
||||
DCI.AddToWorklist(NewV2.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
|
||||
DAG.getConstant(PermuteImm, DL, MVT::i8));
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
return DAG.getBitcast(RootVT, Res);
|
||||
|
@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
// type remains the same.
|
||||
if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) {
|
||||
MVT LegalVT = LT.second;
|
||||
if (LegalVT.getVectorElementType().getSizeInBits() ==
|
||||
if (LegalVT.isVector() &&
|
||||
LegalVT.getVectorElementType().getSizeInBits() ==
|
||||
Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
|
||||
LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
|
||||
|
||||
|
@ -648,7 +648,7 @@ private:
|
||||
// track in a CHI. In the PDom walk, there can be values in the
|
||||
// stack which are not control dependent e.g., nested loop.
|
||||
if (si != RenameStack.end() && si->second.size() &&
|
||||
DT->dominates(Pred, si->second.back()->getParent())) {
|
||||
DT->properlyDominates(Pred, si->second.back()->getParent())) {
|
||||
C.Dest = BB; // Assign the edge
|
||||
C.I = si->second.pop_back_val(); // Assign the argument
|
||||
DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName()
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/RegionIterator.h"
|
||||
#include "llvm/Analysis/RegionPass.h"
|
||||
@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass {
|
||||
Region *ParentRegion;
|
||||
|
||||
DominatorTree *DT;
|
||||
LoopInfo *LI;
|
||||
|
||||
SmallVector<RegionNode *, 8> Order;
|
||||
std::deque<RegionNode *> Order;
|
||||
BBSet Visited;
|
||||
|
||||
BBPhiMap DeletedPhis;
|
||||
@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass {
|
||||
|
||||
void gatherPredicates(RegionNode *N);
|
||||
|
||||
void collectInfos();
|
||||
void analyzeNode(RegionNode *N);
|
||||
|
||||
void insertConditions(bool Loops);
|
||||
|
||||
@ -258,7 +256,6 @@ public:
|
||||
AU.addRequired<DivergenceAnalysis>();
|
||||
AU.addRequiredID(LowerSwitchID);
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<LoopInfoWrapperPass>();
|
||||
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
RegionPass::getAnalysisUsage(AU);
|
||||
@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
|
||||
|
||||
/// \brief Build up the general order of nodes
|
||||
void StructurizeCFG::orderNodes() {
|
||||
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
|
||||
SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
|
||||
assert(Visited.empty());
|
||||
assert(Predicates.empty());
|
||||
assert(Loops.empty());
|
||||
assert(LoopPreds.empty());
|
||||
|
||||
// The reverse post-order traversal of the list gives us an ordering close
|
||||
// to what we want. The only problem with it is that sometimes backedges
|
||||
// for outer loops will be visited before backedges for inner loops.
|
||||
for (RegionNode *RN : RPOT) {
|
||||
BasicBlock *BB = RN->getEntry();
|
||||
Loop *Loop = LI->getLoopFor(BB);
|
||||
++LoopBlocks[Loop];
|
||||
// This must be RPO order for the back edge detection to work
|
||||
for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {
|
||||
// FIXME: Is there a better order to use for structurization?
|
||||
Order.push_back(RN);
|
||||
analyzeNode(RN);
|
||||
}
|
||||
|
||||
unsigned CurrentLoopDepth = 0;
|
||||
Loop *CurrentLoop = nullptr;
|
||||
for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
|
||||
BasicBlock *BB = (*I)->getEntry();
|
||||
unsigned LoopDepth = LI->getLoopDepth(BB);
|
||||
|
||||
if (is_contained(Order, *I))
|
||||
continue;
|
||||
|
||||
if (LoopDepth < CurrentLoopDepth) {
|
||||
// Make sure we have visited all blocks in this loop before moving back to
|
||||
// the outer loop.
|
||||
|
||||
auto LoopI = I;
|
||||
while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
|
||||
LoopI++;
|
||||
BasicBlock *LoopBB = (*LoopI)->getEntry();
|
||||
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
|
||||
--BlockCount;
|
||||
Order.push_back(*LoopI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CurrentLoop = LI->getLoopFor(BB);
|
||||
if (CurrentLoop)
|
||||
LoopBlocks[CurrentLoop]--;
|
||||
|
||||
CurrentLoopDepth = LoopDepth;
|
||||
Order.push_back(*I);
|
||||
}
|
||||
|
||||
// This pass originally used a post-order traversal and then operated on
|
||||
// the list in reverse. Now that we are using a reverse post-order traversal
|
||||
// rather than re-working the whole pass to operate on the list in order,
|
||||
// we just reverse the list and continue to operate on it in reverse.
|
||||
std::reverse(Order.begin(), Order.end());
|
||||
}
|
||||
|
||||
/// \brief Determine the end of the loops
|
||||
@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
|
||||
}
|
||||
|
||||
/// \brief Collect various loop and predicate infos
|
||||
void StructurizeCFG::collectInfos() {
|
||||
// Reset predicate
|
||||
Predicates.clear();
|
||||
void StructurizeCFG::analyzeNode(RegionNode *RN) {
|
||||
DEBUG(dbgs() << "Visiting: "
|
||||
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
|
||||
<< RN->getEntry()->getName() << '\n');
|
||||
|
||||
// and loop infos
|
||||
Loops.clear();
|
||||
LoopPreds.clear();
|
||||
// Analyze all the conditions leading to a node
|
||||
gatherPredicates(RN);
|
||||
|
||||
// Reset the visited nodes
|
||||
Visited.clear();
|
||||
// Remember that we've seen this node
|
||||
Visited.insert(RN->getEntry());
|
||||
|
||||
for (RegionNode *RN : reverse(Order)) {
|
||||
DEBUG(dbgs() << "Visiting: "
|
||||
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
|
||||
<< RN->getEntry()->getName() << " Loop Depth: "
|
||||
<< LI->getLoopDepth(RN->getEntry()) << "\n");
|
||||
|
||||
// Analyze all the conditions leading to a node
|
||||
gatherPredicates(RN);
|
||||
|
||||
// Remember that we've seen this node
|
||||
Visited.insert(RN->getEntry());
|
||||
|
||||
// Find the last back edges
|
||||
analyzeLoops(RN);
|
||||
}
|
||||
// Find the last back edges
|
||||
analyzeLoops(RN);
|
||||
}
|
||||
|
||||
/// \brief Insert the missing branch conditions
|
||||
@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
|
||||
BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
|
||||
LLVMContext &Context = Func->getContext();
|
||||
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
|
||||
Order.back()->getEntry();
|
||||
Order.front()->getEntry();
|
||||
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
|
||||
Func, Insert);
|
||||
DT->addNewBlock(Flow, Dominator);
|
||||
@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
|
||||
/// Take one node from the order vector and wire it up
|
||||
void StructurizeCFG::wireFlow(bool ExitUseAllowed,
|
||||
BasicBlock *LoopEnd) {
|
||||
RegionNode *Node = Order.pop_back_val();
|
||||
RegionNode *Node = Order.front();
|
||||
Order.pop_front();
|
||||
Visited.insert(Node->getEntry());
|
||||
|
||||
if (isPredictableTrue(Node)) {
|
||||
@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
|
||||
|
||||
PrevNode = Node;
|
||||
while (!Order.empty() && !Visited.count(LoopEnd) &&
|
||||
dominatesPredicates(Entry, Order.back())) {
|
||||
dominatesPredicates(Entry, Order.front())) {
|
||||
handleLoops(false, LoopEnd);
|
||||
}
|
||||
|
||||
@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
|
||||
|
||||
void StructurizeCFG::handleLoops(bool ExitUseAllowed,
|
||||
BasicBlock *LoopEnd) {
|
||||
RegionNode *Node = Order.back();
|
||||
RegionNode *Node = Order.front();
|
||||
BasicBlock *LoopStart = Node->getEntry();
|
||||
|
||||
if (!Loops.count(LoopStart)) {
|
||||
@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
|
||||
ParentRegion = R;
|
||||
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
|
||||
orderNodes();
|
||||
collectInfos();
|
||||
|
||||
createFlow();
|
||||
insertConditions(false);
|
||||
insertConditions(true);
|
||||
|
@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
|
||||
Instruction *LastInduction = VecInd;
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction);
|
||||
recordVectorLoopValueForInductionCast(II, LastInduction, Part);
|
||||
|
||||
if (isa<TruncInst>(EntryVal))
|
||||
addMetadata(LastInduction, EntryVal);
|
||||
else
|
||||
recordVectorLoopValueForInductionCast(II, LastInduction, Part);
|
||||
|
||||
LastInduction = cast<Instruction>(addFastMathFlag(
|
||||
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
|
||||
}
|
||||
@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
|
||||
|
||||
// If we haven't yet vectorized the induction variable, splat the scalar
|
||||
// induction variable, and build the necessary step vectors.
|
||||
// TODO: Don't do it unless the vectorized IV is really required.
|
||||
if (!VectorizedIV) {
|
||||
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *EntryPart =
|
||||
getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
|
||||
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
|
||||
recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
|
||||
if (Trunc)
|
||||
addMetadata(EntryPart, Trunc);
|
||||
else
|
||||
recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
|
||||
DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<
|
||||
Lane << " from " << *Scalar << ".\n");
|
||||
ExternalUses.emplace_back(Scalar, nullptr, Lane);
|
||||
continue;
|
||||
}
|
||||
for (User *U : Scalar->users()) {
|
||||
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
|
||||
@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||
if (!A || !B)
|
||||
return false;
|
||||
Value *VL[] = { A, B };
|
||||
return tryToVectorizeList(VL, R, None, true);
|
||||
return tryToVectorizeList(VL, R, true);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
ArrayRef<Value *> BuildVector,
|
||||
bool AllowReorder,
|
||||
bool NeedExtraction) {
|
||||
bool AllowReorder) {
|
||||
if (VL.size() < 2)
|
||||
return false;
|
||||
|
||||
@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
<< "\n");
|
||||
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
|
||||
|
||||
ArrayRef<Value *> EmptyArray;
|
||||
ArrayRef<Value *> BuildVectorSlice;
|
||||
if (!BuildVector.empty())
|
||||
BuildVectorSlice = BuildVector.slice(I, OpsWidth);
|
||||
|
||||
R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
|
||||
R.buildTree(Ops);
|
||||
// TODO: check if we can allow reordering for more cases.
|
||||
if (AllowReorder && R.shouldReorder()) {
|
||||
// Conceptually, there is nothing actually preventing us from trying to
|
||||
@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
// reductions. However, at this point, we only expect to get here when
|
||||
// there are exactly two operations.
|
||||
assert(Ops.size() == 2);
|
||||
assert(BuildVectorSlice.empty());
|
||||
Value *ReorderedOps[] = {Ops[1], Ops[0]};
|
||||
R.buildTree(ReorderedOps, None);
|
||||
}
|
||||
@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
|
||||
<< " and with tree size "
|
||||
<< ore::NV("TreeSize", R.getTreeSize()));
|
||||
|
||||
Value *VectorizedRoot = R.vectorizeTree();
|
||||
|
||||
// Reconstruct the build vector by extracting the vectorized root. This
|
||||
// way we handle the case where some elements of the vector are
|
||||
// undefined.
|
||||
// (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
|
||||
if (!BuildVectorSlice.empty()) {
|
||||
// The insert point is the last build vector instruction. The
|
||||
// vectorized root will precede it. This guarantees that we get an
|
||||
// instruction. The vectorized tree could have been constant folded.
|
||||
Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
|
||||
unsigned VecIdx = 0;
|
||||
for (auto &V : BuildVectorSlice) {
|
||||
IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
|
||||
++BasicBlock::iterator(InsertAfter));
|
||||
Instruction *I = cast<Instruction>(V);
|
||||
assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
|
||||
Instruction *Extract =
|
||||
cast<Instruction>(Builder.CreateExtractElement(
|
||||
VectorizedRoot, Builder.getInt32(VecIdx++)));
|
||||
I->setOperand(1, Extract);
|
||||
I->moveAfter(Extract);
|
||||
InsertAfter = I;
|
||||
}
|
||||
}
|
||||
R.vectorizeTree();
|
||||
// Move to the next bundle.
|
||||
I += VF - 1;
|
||||
NextInst = I + 1;
|
||||
@ -5495,11 +5462,9 @@ private:
|
||||
///
|
||||
/// Returns true if it matches
|
||||
static bool findBuildVector(InsertElementInst *LastInsertElem,
|
||||
SmallVectorImpl<Value *> &BuildVector,
|
||||
SmallVectorImpl<Value *> &BuildVectorOpds) {
|
||||
Value *V = nullptr;
|
||||
do {
|
||||
BuildVector.push_back(LastInsertElem);
|
||||
BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
|
||||
V = LastInsertElem->getOperand(0);
|
||||
if (isa<UndefValue>(V))
|
||||
@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
|
||||
if (!LastInsertElem || !LastInsertElem->hasOneUse())
|
||||
return false;
|
||||
} while (true);
|
||||
std::reverse(BuildVector.begin(), BuildVector.end());
|
||||
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
|
||||
return true;
|
||||
}
|
||||
@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem,
|
||||
///
|
||||
/// \return true if it matches.
|
||||
static bool findBuildAggregate(InsertValueInst *IV,
|
||||
SmallVectorImpl<Value *> &BuildVector,
|
||||
SmallVectorImpl<Value *> &BuildVectorOpds) {
|
||||
Value *V;
|
||||
do {
|
||||
BuildVector.push_back(IV);
|
||||
BuildVectorOpds.push_back(IV->getInsertedValueOperand());
|
||||
V = IV->getAggregateOperand();
|
||||
if (isa<UndefValue>(V))
|
||||
@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV,
|
||||
if (!IV || !IV->hasOneUse())
|
||||
return false;
|
||||
} while (true);
|
||||
std::reverse(BuildVector.begin(), BuildVector.end());
|
||||
std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
|
||||
return true;
|
||||
}
|
||||
@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
|
||||
if (!R.canMapToVector(IVI->getType(), DL))
|
||||
return false;
|
||||
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
|
||||
if (!findBuildAggregate(IVI, BuildVectorOpds))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
|
||||
// Aggregate value is unlikely to be processed in vector register, we need to
|
||||
// extract scalars into scalar registers, so NeedExtraction is set true.
|
||||
return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
|
||||
return tryToVectorizeList(BuildVectorOpds, R);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
|
||||
BasicBlock *BB, BoUpSLP &R) {
|
||||
SmallVector<Value *, 16> BuildVector;
|
||||
SmallVector<Value *, 16> BuildVectorOpds;
|
||||
if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
|
||||
if (!findBuildVector(IEI, BuildVectorOpds))
|
||||
return false;
|
||||
|
||||
// Vectorize starting with the build vector operands ignoring the BuildVector
|
||||
// instructions for the purpose of scheduling and user extraction.
|
||||
return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
|
||||
return tryToVectorizeList(BuildVectorOpds, R);
|
||||
}
|
||||
|
||||
bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
|
||||
@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
// is done when there are exactly two elements since tryToVectorizeList
|
||||
// asserts that there are only two values when AllowReorder is true.
|
||||
bool AllowReorder = NumElts == 2;
|
||||
if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
|
||||
None, AllowReorder)) {
|
||||
if (NumElts > 1 &&
|
||||
tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
|
||||
// Success start over because instructions might have been changed.
|
||||
HaveVectorizedPhiNodes = true;
|
||||
Changed = true;
|
||||
|
61
test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
Normal file
61
test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
Normal file
@ -0,0 +1,61 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s
|
||||
--- |
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
@foo1 = common global [1073741824 x i32] zeroinitializer, align 4
|
||||
@foo2 = common global [1073741824 x i32] zeroinitializer, align 4
|
||||
|
||||
define i32 @gv_large() {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
store i32 0, i32* %retval, align 4
|
||||
%0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4
|
||||
%1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4
|
||||
%add = add nsw i32 %0, %1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: gv_large
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
stack:
|
||||
- { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
di-variable: '', di-expression: '', di-location: '' }
|
||||
constants:
|
||||
body: |
|
||||
bb.1:
|
||||
; CHECK-LABEL: name: gv_large
|
||||
; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0
|
||||
; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16
|
||||
; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32
|
||||
; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]]
|
||||
; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0
|
||||
; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16
|
||||
; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32
|
||||
; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]]
|
||||
; CHECK: STRWui %wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval)
|
||||
; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
|
||||
; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
|
||||
; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]]
|
||||
; CHECK: %w0 = COPY [[ADDWrr]]
|
||||
; CHECK: RET_ReallyLR implicit %w0
|
||||
%1:gpr(s32) = G_CONSTANT i32 0
|
||||
%4:gpr(p0) = G_GLOBAL_VALUE @foo1
|
||||
%3:gpr(p0) = COPY %4(p0)
|
||||
%7:gpr(p0) = G_GLOBAL_VALUE @foo2
|
||||
%6:gpr(p0) = COPY %7(p0)
|
||||
%0:gpr(p0) = G_FRAME_INDEX %stack.0.retval
|
||||
G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval)
|
||||
%2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
|
||||
%5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
|
||||
%8:gpr(s32) = G_ADD %2, %5
|
||||
%w0 = COPY %8(s32)
|
||||
RET_ReallyLR implicit %w0
|
||||
|
||||
...
|
@ -629,14 +629,29 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
|
||||
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
|
||||
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
|
||||
|
||||
; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
|
||||
; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
ret i8 %old
|
||||
}
|
||||
|
||||
define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
|
||||
; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
|
||||
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
|
||||
%success = extractvalue { i8, i1 } %pair, 1
|
||||
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
|
||||
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
|
||||
|
||||
; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
ret i1 %success
|
||||
}
|
||||
|
||||
define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
|
||||
; CHECK-LABEL: test_atomic_cmpxchg_i16:
|
||||
%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
|
||||
@ -644,14 +659,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
|
||||
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
|
||||
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
|
||||
|
||||
; CHECK: casah w0, w1, [x[[ADDR]]]
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
|
||||
; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
ret i16 %old
|
||||
}
|
||||
|
||||
define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
|
||||
; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
|
||||
%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
|
||||
%success = extractvalue { i16, i1 } %pair, 1
|
||||
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
|
||||
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
|
||||
|
||||
; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cmp w[[NEW]], w0, uxth
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
ret i1 %success
|
||||
}
|
||||
|
||||
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
; CHECK-LABEL: test_atomic_cmpxchg_i32:
|
||||
%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire
|
||||
|
@ -66,9 +66,10 @@ ENDIF: ; preds = %LOOP
|
||||
|
||||
; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
|
||||
; OPT: llvm.amdgcn.break
|
||||
; OPT: llvm.amdgcn.break
|
||||
; OPT: llvm.amdgcn.if.break
|
||||
; OPT: llvm.amdgcn.if.break
|
||||
; OPT: llvm.amdgcn.loop
|
||||
; OPT: llvm.amdgcn.if.break
|
||||
; OPT: llvm.amdgcn.if.break
|
||||
; OPT: llvm.amdgcn.end.cf
|
||||
|
||||
; GCN-LABEL: {{^}}multi_if_break_loop:
|
||||
|
@ -124,55 +124,100 @@ bb23: ; preds = %bb10
|
||||
; Earlier version of above, before a run of the structurizer.
|
||||
; IR-LABEL: @nested_loop_conditions(
|
||||
|
||||
; IR: Flow7:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17)
|
||||
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
|
||||
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
|
||||
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
|
||||
; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8
|
||||
; IR: %tmp1235 = icmp slt i32 %tmp1134, 9
|
||||
; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow
|
||||
|
||||
; IR: Flow1:
|
||||
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
|
||||
; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
|
||||
; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
|
||||
; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
|
||||
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
|
||||
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
|
||||
; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17)
|
||||
; IR-NEXT: br i1 %18, label %Flow7, label %bb14
|
||||
|
||||
; IR: Flow2:
|
||||
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
|
||||
; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
|
||||
; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
|
||||
; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
|
||||
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
|
||||
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
|
||||
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
|
||||
; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0
|
||||
; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1
|
||||
; IR-NEXT: br i1 %25, label %bb21, label %Flow3
|
||||
|
||||
; IR: bb21:
|
||||
; IR: %tmp12 = icmp slt i32 %tmp11, 9
|
||||
; IR-NEXT: %27 = xor i1 %tmp12, true
|
||||
; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
|
||||
; IR-NEXT: br label %Flow3
|
||||
; IR: bb14.lr.ph:
|
||||
; IR: br label %bb14
|
||||
|
||||
; IR: Flow3:
|
||||
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
|
||||
; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
|
||||
; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
|
||||
; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
|
||||
; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
|
||||
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %18)
|
||||
; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17)
|
||||
; IR: %1 = extractvalue { i1, i64 } %0, 0
|
||||
; IR: %2 = extractvalue { i1, i64 } %0, 1
|
||||
; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
|
||||
|
||||
; IR: bb4.bb13_crit_edge:
|
||||
; IR: br label %Flow4
|
||||
|
||||
; IR: Flow4:
|
||||
; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %2)
|
||||
; IR: br label %Flow
|
||||
|
||||
; IR: bb13:
|
||||
; IR: br label %bb31
|
||||
|
||||
; IR: Flow:
|
||||
; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ]
|
||||
; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4)
|
||||
; IR: %6 = extractvalue { i1, i64 } %5, 0
|
||||
; IR: %7 = extractvalue { i1, i64 } %5, 1
|
||||
; IR: br i1 %6, label %bb13, label %bb31
|
||||
|
||||
; IR: bb14:
|
||||
; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ]
|
||||
; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ]
|
||||
; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ]
|
||||
; IR: %tmp15 = icmp eq i32 %tmp1037, 1
|
||||
; IR: %8 = xor i1 %tmp15, true
|
||||
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
|
||||
; IR: %10 = extractvalue { i1, i64 } %9, 0
|
||||
; IR: %11 = extractvalue { i1, i64 } %9, 1
|
||||
; IR: br i1 %10, label %bb31.loopexit, label %Flow1
|
||||
|
||||
; IR: Flow1:
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11)
|
||||
; IR: %13 = extractvalue { i1, i64 } %12, 0
|
||||
; IR: %14 = extractvalue { i1, i64 } %12, 1
|
||||
; IR: br i1 %13, label %bb16, label %Flow2
|
||||
|
||||
; IR: bb16:
|
||||
; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32>
|
||||
; IR: br label %bb18
|
||||
|
||||
; IR: Flow2:
|
||||
; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ]
|
||||
; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ]
|
||||
; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ]
|
||||
; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ]
|
||||
; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi)
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18)
|
||||
; IR: br i1 %19, label %Flow3, label %bb14
|
||||
|
||||
; IR: bb18:
|
||||
; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef
|
||||
; IR: %tmp20 = icmp slt i32 %tmp19, 9
|
||||
; IR: br i1 %tmp20, label %bb21, label %bb18
|
||||
|
||||
; IR: bb21:
|
||||
; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1
|
||||
; IR: %tmp23 = lshr i32 %tmp22, 16
|
||||
; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23
|
||||
; IR: %tmp25 = uitofp i32 %tmp24 to float
|
||||
; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000
|
||||
; IR: %tmp27 = fsub float %tmp26, undef
|
||||
; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01
|
||||
; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2
|
||||
; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29
|
||||
; IR: %tmp7 = zext i32 %tmp30 to i64
|
||||
; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7
|
||||
; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16
|
||||
; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0
|
||||
; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef
|
||||
; IR: %tmp12 = icmp slt i32 %tmp11, 9
|
||||
; IR: %20 = xor i1 %tmp12, true
|
||||
; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken)
|
||||
; IR: br label %Flow2
|
||||
|
||||
; IR: bb31.loopexit:
|
||||
; IR: br label %Flow1
|
||||
|
||||
; IR: bb31:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7)
|
||||
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
|
||||
; IR: store volatile i32 0, i32 addrspace(1)* undef
|
||||
; IR: ret void
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}nested_loop_conditions:
|
||||
|
@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
|
||||
; ARM: @ %bb.0: @ %entry
|
||||
; ARM-NEXT: ldrb r0, [r0]
|
||||
; ARM-NEXT: uxtb r2, r2
|
||||
; ARM-NEXT: and r0, r0, r1
|
||||
; ARM-NEXT: uxtb r1, r0
|
||||
; ARM-NEXT: and r1, r0, r1
|
||||
; ARM-NEXT: mov r0, #0
|
||||
; ARM-NEXT: cmp r1, r2
|
||||
; ARM-NEXT: movweq r0, #1
|
||||
@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
|
||||
; ARMEB: @ %bb.0: @ %entry
|
||||
; ARMEB-NEXT: ldrb r0, [r0]
|
||||
; ARMEB-NEXT: uxtb r2, r2
|
||||
; ARMEB-NEXT: and r0, r0, r1
|
||||
; ARMEB-NEXT: uxtb r1, r0
|
||||
; ARMEB-NEXT: and r1, r0, r1
|
||||
; ARMEB-NEXT: mov r0, #0
|
||||
; ARMEB-NEXT: cmp r1, r2
|
||||
; ARMEB-NEXT: movweq r0, #1
|
||||
@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
|
||||
;
|
||||
; THUMB1-LABEL: test6:
|
||||
; THUMB1: @ %bb.0: @ %entry
|
||||
; THUMB1-NEXT: ldrb r0, [r0]
|
||||
; THUMB1-NEXT: ands r0, r1
|
||||
; THUMB1-NEXT: uxtb r3, r0
|
||||
; THUMB1-NEXT: ldrb r3, [r0]
|
||||
; THUMB1-NEXT: ands r3, r1
|
||||
; THUMB1-NEXT: uxtb r2, r2
|
||||
; THUMB1-NEXT: movs r0, #1
|
||||
; THUMB1-NEXT: movs r1, #0
|
||||
@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) {
|
||||
; THUMB2: @ %bb.0: @ %entry
|
||||
; THUMB2-NEXT: ldrb r0, [r0]
|
||||
; THUMB2-NEXT: uxtb r2, r2
|
||||
; THUMB2-NEXT: ands r0, r1
|
||||
; THUMB2-NEXT: uxtb r1, r0
|
||||
; THUMB2-NEXT: ands r1, r0
|
||||
; THUMB2-NEXT: movs r0, #0
|
||||
; THUMB2-NEXT: cmp r1, r2
|
||||
; THUMB2-NEXT: it eq
|
||||
|
@ -49,9 +49,10 @@ entry:
|
||||
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
|
||||
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
|
||||
; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0
|
||||
; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]]
|
||||
; CHECK-THUMBV6-NEXT: movs r0, #1
|
||||
; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0
|
||||
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]]
|
||||
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]]
|
||||
; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]]
|
||||
; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]]
|
||||
; CHECK-THUMBV6-NEXT: [[END]]:
|
||||
|
@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
|
||||
; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
|
||||
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
|
||||
@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
|
||||
; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
|
||||
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
|
||||
|
15
test/CodeGen/ARM/global-merge-dllexport.ll
Normal file
15
test/CodeGen/ARM/global-merge-dllexport.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s
|
||||
|
||||
@x = global i32 0, align 4
|
||||
@y = dllexport global i32 0, align 4
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
; CHECK: f1:
|
||||
; CHECK: movw [[REG1:r[0-9]+]], :lower16:x
|
||||
; CHECK: movt [[REG1]], :upper16:x
|
||||
store i32 %a1, i32* @x, align 4
|
||||
store i32 %a2, i32* @y, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: .L_MergedGlobals
|
@ -1,8 +1,9 @@
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefix=CHECK-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE
|
||||
; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s --check-prefixes=CHECK-WIN32
|
||||
|
||||
@x = global i32 0, align 4
|
||||
@y = global i32 0, align 4
|
||||
@ -10,10 +11,13 @@
|
||||
|
||||
define void @f1(i32 %a1, i32 %a2) {
|
||||
;CHECK: f1:
|
||||
;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]]
|
||||
;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.?LCPI[0-9]+_[0-9]]]
|
||||
;CHECK: [[LABEL1]]:
|
||||
;CHECK-MERGE: .long .L_MergedGlobals
|
||||
;CHECK-NO-MERGE: .long {{_?x}}
|
||||
;CHECK-WIN32: f1:
|
||||
;CHECK-WIN32: movw [[REG1:r[0-9]+]], :lower16:.L_MergedGlobals
|
||||
;CHECK-WIN32: movt [[REG1]], :upper16:.L_MergedGlobals
|
||||
store i32 %a1, i32* @x, align 4
|
||||
store i32 %a2, i32* @y, align 4
|
||||
ret void
|
||||
@ -21,10 +25,13 @@ define void @f1(i32 %a1, i32 %a2) {
|
||||
|
||||
define void @g1(i32 %a1, i32 %a2) {
|
||||
;CHECK: g1:
|
||||
;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]]
|
||||
;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.?LCPI[0-9]+_[0-9]]]
|
||||
;CHECK: [[LABEL2]]:
|
||||
;CHECK-MERGE: .long .L_MergedGlobals
|
||||
;CHECK-NO-MERGE: .long {{_?y}}
|
||||
;CHECK-WIN32: g1:
|
||||
;CHECK-WIN32: movw [[REG2:r[0-9]+]], :lower16:.L_MergedGlobals
|
||||
;CHECK-WIN32: movt [[REG2]], :upper16:.L_MergedGlobals
|
||||
store i32 %a1, i32* @y, align 4
|
||||
store i32 %a2, i32* @z, align 4
|
||||
ret void
|
||||
@ -35,6 +42,7 @@ define void @g1(i32 %a1, i32 %a2) {
|
||||
;CHECK-MERGE: .type .L_MergedGlobals,%object
|
||||
;CHECK-MERGE: .local .L_MergedGlobals
|
||||
;CHECK-MERGE: .comm .L_MergedGlobals,12,4
|
||||
;CHECK-WIN32: .lcomm .L_MergedGlobals,12,4
|
||||
|
||||
;CHECK-MERGE: .globl x
|
||||
;CHECK-MERGE: x = .L_MergedGlobals
|
||||
@ -45,3 +53,10 @@ define void @g1(i32 %a1, i32 %a2) {
|
||||
;CHECK-MERGE: .globl z
|
||||
;CHECK-MERGE: z = .L_MergedGlobals+8
|
||||
;CHECK-MERGE: .size z, 4
|
||||
|
||||
;CHECK-WIN32: .globl x
|
||||
;CHECK-WIN32: x = .L_MergedGlobals
|
||||
;CHECK-WIN32: .globl y
|
||||
;CHECK-WIN32: y = .L_MergedGlobals+4
|
||||
;CHECK-WIN32: .globl z
|
||||
;CHECK-WIN32: z = .L_MergedGlobals+8
|
||||
|
67
test/CodeGen/ARM/peephole-phi.mir
Normal file
67
test/CodeGen/ARM/peephole-phi.mir
Normal file
@ -0,0 +1,67 @@
|
||||
# RUN: llc -o - %s -mtriple=armv7-- -verify-machineinstrs -run-pass=peephole-opt | FileCheck %s
|
||||
#
|
||||
# Make sure we do not crash on this input.
|
||||
# Note that this input could in principle be optimized, but right now we don't
|
||||
# have this case implemented so the output should simply be unchanged.
|
||||
#
|
||||
# CHECK-LABEL: name: func
|
||||
# CHECK: body: |
|
||||
# CHECK: bb.0:
|
||||
# CHECK: Bcc %bb.2, 1, undef %cpsr
|
||||
#
|
||||
# CHECK: bb.1:
|
||||
# CHECK: %0:dpr = IMPLICIT_DEF
|
||||
# CHECK: %1:gpr, %2:gpr = VMOVRRD %0, 14, %noreg
|
||||
# CHECK: B %bb.3
|
||||
#
|
||||
# CHECK: bb.2:
|
||||
# CHECK: %3:spr = IMPLICIT_DEF
|
||||
# CHECK: %4:gpr = VMOVRS %3, 14, %noreg
|
||||
#
|
||||
# CHECK: bb.3:
|
||||
# CHECK: %5:gpr = PHI %1, %bb.1, %4, %bb.2
|
||||
# CHECK: %6:spr = VMOVSR %5, 14, %noreg
|
||||
---
|
||||
name: func0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
Bcc %bb.2, 1, undef %cpsr
|
||||
|
||||
bb.1:
|
||||
%0:dpr = IMPLICIT_DEF
|
||||
%1:gpr, %2:gpr = VMOVRRD %0:dpr, 14, %noreg
|
||||
B %bb.3
|
||||
|
||||
bb.2:
|
||||
%3:spr = IMPLICIT_DEF
|
||||
%4:gpr = VMOVRS %3:spr, 14, %noreg
|
||||
|
||||
bb.3:
|
||||
%5:gpr = PHI %1, %bb.1, %4, %bb.2
|
||||
%6:spr = VMOVSR %5, 14, %noreg
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: func1
|
||||
# CHECK: %6:spr = PHI %0, %bb.1, %2, %bb.2
|
||||
# CHEKC: %7:spr = COPY %6
|
||||
---
|
||||
name: func1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
Bcc %bb.2, 1, undef %cpsr
|
||||
|
||||
bb.1:
|
||||
%1:spr = IMPLICIT_DEF
|
||||
%0:gpr = VMOVRS %1, 14, %noreg
|
||||
B %bb.3
|
||||
|
||||
bb.2:
|
||||
%3:spr = IMPLICIT_DEF
|
||||
%2:gpr = VMOVRS %3:spr, 14, %noreg
|
||||
|
||||
bb.3:
|
||||
%4:gpr = PHI %0, %bb.1, %2, %bb.2
|
||||
%5:spr = VMOVSR %4, 14, %noreg
|
||||
...
|
94
test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
Normal file
94
test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
Normal file
@ -0,0 +1,94 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; Make sure that a negative value for the compare-and-swap is zero extended
|
||||
; from i8/i16 to i32 since it will be compared for equality.
|
||||
; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
|
||||
|
||||
@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
|
||||
@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
|
||||
@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
|
||||
|
||||
define signext i32 @main() {
|
||||
; CHECK-LABEL: main:
|
||||
; CHECK: li 3, -32477
|
||||
; CHECK: lis 12, 0
|
||||
; CHECK: li 6, 234
|
||||
; CHECK: sth 3, 46(1)
|
||||
; CHECK: ori 4, 12, 33059
|
||||
; CHECK: sync
|
||||
; CHECK: .LBB0_1: # %L.entry
|
||||
; CHECK: lharx 3, 0, 5
|
||||
; CHECK: cmpw 4, 3
|
||||
; CHECK: bne 0, .LBB0_3
|
||||
; CHECK: sthcx. 6, 0, 5
|
||||
; CHECK: bne 0, .LBB0_1
|
||||
; CHECK: b .LBB0_4
|
||||
; CHECK: .LBB0_3: # %L.entry
|
||||
; CHECK: sthcx. 3, 0, 5
|
||||
; CHECK: .LBB0_4: # %L.entry
|
||||
; CHECK: cmplwi 3, 33059
|
||||
; CHECK: lwsync
|
||||
; CHECK: lhz 3, 46(1)
|
||||
; CHECK: cmplwi 3, 234
|
||||
;
|
||||
; CHECK-P7-LABEL: main:
|
||||
; CHECK-P7: lis 4, 0
|
||||
; CHECK-P7: li 7, 0
|
||||
; CHECK-P7: li 3, -32477
|
||||
; CHECK-P7: sth 3, 46(1)
|
||||
; CHECK-P7: li 5, 234
|
||||
; CHECK-P7: ori 4, 4, 33059
|
||||
; CHECK-P7: rlwinm 3, 6, 3, 27, 27
|
||||
; CHECK-P7: ori 7, 7, 65535
|
||||
; CHECK-P7: sync
|
||||
; CHECK-P7: slw 8, 5, 3
|
||||
; CHECK-P7: slw 5, 7, 3
|
||||
; CHECK-P7: slw 9, 4, 3
|
||||
; CHECK-P7: and 7, 8, 5
|
||||
; CHECK-P7: rldicr 4, 6, 0, 61
|
||||
; CHECK-P7: and 8, 9, 5
|
||||
; CHECK-P7: .LBB0_1: # %L.entry
|
||||
; CHECK-P7: lwarx 9, 0, 4
|
||||
; CHECK-P7: and 6, 9, 5
|
||||
; CHECK-P7: cmpw 0, 6, 8
|
||||
; CHECK-P7: bne 0, .LBB0_3
|
||||
; CHECK-P7: andc 9, 9, 5
|
||||
; CHECK-P7: or 9, 9, 7
|
||||
; CHECK-P7: stwcx. 9, 0, 4
|
||||
; CHECK-P7: bne 0, .LBB0_1
|
||||
; CHECK-P7: b .LBB0_4
|
||||
; CHECK-P7: .LBB0_3: # %L.entry
|
||||
; CHECK-P7: stwcx. 9, 0, 4
|
||||
; CHECK-P7: .LBB0_4: # %L.entry
|
||||
; CHECK-P7: srw 3, 6, 3
|
||||
; CHECK-P7: lwsync
|
||||
; CHECK-P7: cmplwi 3, 33059
|
||||
; CHECK-P7: lhz 3, 46(1)
|
||||
; CHECK-P7: cmplwi 3, 234
|
||||
L.entry:
|
||||
%value.addr = alloca i16, align 2
|
||||
store i16 -32477, i16* %value.addr, align 2
|
||||
%0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
|
||||
%1 = extractvalue { i16, i1 } %0, 1
|
||||
br i1 %1, label %L.B0000, label %L.B0003
|
||||
|
||||
L.B0003: ; preds = %L.entry
|
||||
%puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
|
||||
ret i32 1
|
||||
|
||||
L.B0000: ; preds = %L.entry
|
||||
%2 = load i16, i16* %value.addr, align 2
|
||||
%3 = icmp eq i16 %2, 234
|
||||
br i1 %3, label %L.B0001, label %L.B0005
|
||||
|
||||
L.B0005: ; preds = %L.B0000
|
||||
%puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
|
||||
ret i32 1
|
||||
|
||||
L.B0001: ; preds = %L.B0000
|
||||
%puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i32 @puts(i8* nocapture readonly) #0
|
@ -404,6 +404,7 @@ define void @test39() {
|
||||
define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test40:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: b .LBB40_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB40_1:
|
||||
@ -423,6 +424,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test41:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: .LBB41_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -444,6 +446,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test42:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: .LBB42_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -465,6 +468,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test43:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB43_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -485,6 +489,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test44:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB44_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -505,6 +510,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test45:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB45_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -527,6 +533,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test46:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB46_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -549,6 +556,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test47:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB47_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -571,6 +579,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test48:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB48_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -593,6 +602,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test49:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB49_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -615,6 +625,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test50:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: b .LBB50_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB50_1:
|
||||
@ -634,6 +645,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test51:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: .LBB51_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -655,6 +667,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test52:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: .LBB52_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -676,6 +689,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test53:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB53_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -696,6 +710,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test54:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB54_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -716,6 +731,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test55:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB55_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -738,6 +754,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test56:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB56_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -760,6 +777,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test57:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB57_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -782,6 +800,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test58:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB58_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -804,6 +823,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test59:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB59_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -1248,6 +1268,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) {
|
||||
define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test80:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: b .LBB80_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB80_1:
|
||||
@ -1267,6 +1288,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test81:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: .LBB81_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -1288,6 +1310,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test82:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: .LBB82_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -1309,6 +1332,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test83:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB83_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -1329,6 +1353,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test84:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB84_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -1349,6 +1374,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test85:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB85_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -1371,6 +1397,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test86:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB86_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -1393,6 +1420,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test87:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB87_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -1415,6 +1443,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test88:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB88_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -1437,6 +1466,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
; PPC64LE-LABEL: test89:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB89_1:
|
||||
; PPC64LE-NEXT: lbarx 6, 0, 3
|
||||
@ -1459,6 +1489,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
|
||||
define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test90:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: b .LBB90_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
; PPC64LE-NEXT: .LBB90_1:
|
||||
@ -1478,6 +1509,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test91:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: .LBB91_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -1499,6 +1531,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test92:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: .LBB92_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
; PPC64LE-NEXT: cmpw 4, 6
|
||||
@ -1520,6 +1553,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test93:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB93_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -1540,6 +1574,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test94:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: b .LBB94_2
|
||||
; PPC64LE-NEXT: .p2align 5
|
||||
@ -1560,6 +1595,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test95:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB95_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -1582,6 +1618,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test96:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: lwsync
|
||||
; PPC64LE-NEXT: .LBB96_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -1604,6 +1641,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test97:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB97_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -1626,6 +1664,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test98:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB98_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
@ -1648,6 +1687,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
|
||||
; PPC64LE-LABEL: test99:
|
||||
; PPC64LE: # %bb.0:
|
||||
; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31
|
||||
; PPC64LE-NEXT: sync
|
||||
; PPC64LE-NEXT: .LBB99_1:
|
||||
; PPC64LE-NEXT: lharx 6, 0, 3
|
||||
|
@ -4780,3 +4780,42 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; PR35977
|
||||
define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
|
||||
; CHECK-LABEL: test_zext_v8i8_to_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
|
||||
%tmp2 = load <8 x i8>, <8 x i8>* %tmp
|
||||
%tmp3 = extractelement <8 x i8> %tmp2, i32 0
|
||||
%tmp4 = zext i8 %tmp3 to i16
|
||||
%tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0
|
||||
%tmp6 = extractelement <8 x i8> %tmp2, i32 1
|
||||
%tmp7 = zext i8 %tmp6 to i16
|
||||
%tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1
|
||||
%tmp9 = extractelement <8 x i8> %tmp2, i32 2
|
||||
%tmp10 = zext i8 %tmp9 to i16
|
||||
%tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2
|
||||
%tmp12 = extractelement <8 x i8> %tmp2, i32 3
|
||||
%tmp13 = zext i8 %tmp12 to i16
|
||||
%tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3
|
||||
%tmp15 = extractelement <8 x i8> %tmp2, i32 4
|
||||
%tmp16 = zext i8 %tmp15 to i16
|
||||
%tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4
|
||||
%tmp18 = extractelement <8 x i8> %tmp2, i32 5
|
||||
%tmp19 = zext i8 %tmp18 to i16
|
||||
%tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5
|
||||
%tmp21 = extractelement <8 x i8> %tmp2, i32 6
|
||||
%tmp22 = zext i8 %tmp21 to i16
|
||||
%tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6
|
||||
%tmp24 = extractelement <8 x i8> %tmp2, i32 7
|
||||
%tmp25 = zext i8 %tmp24 to i16
|
||||
%tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7
|
||||
%tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0
|
||||
store <8 x i16> %tmp27, <8 x i16>* %tmp28
|
||||
ret void
|
||||
}
|
||||
|
@ -1,10 +1,13 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck -check-prefixes=CHECK,NOBZERO %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-ios10.0-simulator | FileCheck -check-prefixes=CHECK,NOBZERO %s
|
||||
|
||||
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: {{calll|callq}} ___bzero
|
||||
; BZERO: {{calll|callq}} ___bzero
|
||||
; NOBZERO-NOT: bzero
|
||||
define void @foo(i8* %p, i32 %len) {
|
||||
call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
|
||||
ret void
|
||||
|
@ -19,7 +19,8 @@ entry:
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
; CHECK: lock cmpxchg16b
|
||||
; CHECK: lock
|
||||
; CHECK-NEXT: cmpxchg16b
|
||||
|
||||
attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
||||
|
36
test/CodeGen/X86/pr35761.ll
Normal file
36
test/CodeGen/X86/pr35761.ll
Normal file
@ -0,0 +1,36 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-unknown-linux %s -o - | FileCheck %s
|
||||
|
||||
@x = global i8 0, align 1
|
||||
@y = global i32 0, align 4
|
||||
@z = global i24 0, align 4
|
||||
|
||||
define void @PR35761(i32 %call) {
|
||||
; CHECK-LABEL: PR35761:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movzbl {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: movzbl {{.*}}(%rip), %ecx
|
||||
; CHECK-NEXT: xorl $255, %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: movw %cx, {{.*}}(%rip)
|
||||
; CHECK-NEXT: movb $0, z+{{.*}}(%rip)
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = load i8, i8* @x, align 1
|
||||
%tobool = trunc i8 %0 to i1
|
||||
%conv = zext i1 %tobool to i32
|
||||
%or = or i32 32767, %call
|
||||
%neg = xor i32 %or, -1
|
||||
%neg1 = xor i32 %neg, -1
|
||||
%1 = load i32, i32* @y, align 4
|
||||
%xor = xor i32 %neg1, %1
|
||||
%or2 = or i32 %conv, %xor
|
||||
%conv3 = trunc i32 %or2 to i8
|
||||
%bf.load = load i24, i24* @z, align 4
|
||||
%2 = zext i8 %conv3 to i24
|
||||
%bf.value = and i24 %2, 4194303
|
||||
store i24 %bf.value, i24* @z, align 2
|
||||
ret void
|
||||
}
|
||||
|
20
test/CodeGen/X86/pr35972.ll
Normal file
20
test/CodeGen/X86/pr35972.ll
Normal file
@ -0,0 +1,20 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - -mattr=avx512bw | FileCheck %s
|
||||
|
||||
define void @test3(i32 %c, <64 x i1>* %ptr) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: sbbl %ecx, %ecx
|
||||
; CHECK-NEXT: kmovd %ecx, %k0
|
||||
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
|
||||
; CHECK-NEXT: kmovq %k0, (%eax)
|
||||
; CHECK-NEXT: retl
|
||||
%cmp = icmp eq i32 %c, 0
|
||||
%insert = insertelement <64 x i1> undef, i1 %cmp, i32 0
|
||||
%shuf = shufflevector <64 x i1> %insert, <64 x i1> undef, <64 x i32> zeroinitializer
|
||||
store <64 x i1> %shuf, <64 x i1>* %ptr
|
||||
ret void
|
||||
}
|
||||
|
42
test/CodeGen/X86/pr37563.ll
Normal file
42
test/CodeGen/X86/pr37563.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
|
||||
|
||||
%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }>
|
||||
|
||||
@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8
|
||||
@tf_3_var_136 = global i64 0, align 8
|
||||
@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1
|
||||
|
||||
define void @PR35763() {
|
||||
; CHECK-LABEL: PR35763:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movzwl z+{{.*}}(%rip), %ecx
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: movq %rcx, {{.*}}(%rip)
|
||||
; CHECK-NEXT: movl z+{{.*}}(%rip), %eax
|
||||
; CHECK-NEXT: movzbl z+{{.*}}(%rip), %ecx
|
||||
; CHECK-NEXT: shlq $32, %rcx
|
||||
; CHECK-NEXT: orq %rax, %rcx
|
||||
; CHECK-NEXT: movabsq $1090921758719, %rax # imm = 0xFE0000FFFF
|
||||
; CHECK-NEXT: andq %rcx, %rax
|
||||
; CHECK-NEXT: movl %eax, z+{{.*}}(%rip)
|
||||
; CHECK-NEXT: shrq $32, %rax
|
||||
; CHECK-NEXT: movb %al, z+{{.*}}(%rip)
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8
|
||||
%conv = sext i16 %0 to i32
|
||||
%bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 1) to i32*), align 2
|
||||
%bf.clear = and i32 %bf.load, 2097151
|
||||
%bf.cast = zext i32 %bf.clear to i64
|
||||
%conv1 = trunc i64 %bf.cast to i32
|
||||
%or = or i32 %conv, %conv1
|
||||
%conv2 = trunc i32 %or to i16
|
||||
%conv3 = zext i16 %conv2 to i64
|
||||
store i64 %conv3, i64* @tf_3_var_136, align 8
|
||||
%bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
|
||||
%bf.clear5 = and i40 %bf.load4, -8589869057
|
||||
store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2
|
||||
ret void
|
||||
}
|
@ -207,13 +207,12 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind {
|
||||
define <16 x i8> @var_shuffle_v16i8(<16 x i8> %v, <16 x i8> %indices) nounwind {
|
||||
; SSSE3-LABEL: var_shuffle_v16i8:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: pshufb %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: var_shuffle_v16i8:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%index0 = extractelement <16 x i8> %indices, i32 0
|
||||
%index1 = extractelement <16 x i8> %indices, i32 1
|
||||
|
@ -1277,3 +1277,183 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi
|
||||
%ret7 = insertelement <8 x float> %ret6, float %v7, i32 7
|
||||
ret <8 x float> %ret7
|
||||
}
|
||||
|
||||
define <8 x i32> @pr35820(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind {
|
||||
; AVX1-LABEL: pr35820:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vpextrq $1, %xmm1, %r8
|
||||
; AVX1-NEXT: movq %r8, %r10
|
||||
; AVX1-NEXT: shrq $30, %r10
|
||||
; AVX1-NEXT: vmovq %xmm1, %r9
|
||||
; AVX1-NEXT: movq %r9, %rsi
|
||||
; AVX1-NEXT: shrq $30, %rsi
|
||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX1-NEXT: andl $3, %r9d
|
||||
; AVX1-NEXT: andl $12, %esi
|
||||
; AVX1-NEXT: andl $3, %r8d
|
||||
; AVX1-NEXT: andl $12, %r10d
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rdi
|
||||
; AVX1-NEXT: shrq $30, %rdi
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: movq %rcx, %rdx
|
||||
; AVX1-NEXT: shrq $30, %rdx
|
||||
; AVX1-NEXT: andl $3, %ecx
|
||||
; AVX1-NEXT: andl $12, %edx
|
||||
; AVX1-NEXT: andl $3, %eax
|
||||
; AVX1-NEXT: andl $12, %edi
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rdx), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $2, -24(%rsp,%rax,4), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $3, -24(%rsp,%rdi), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rsi), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $2, -24(%rsp,%r8,4), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpinsrd $3, -24(%rsp,%r10), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; INT256-LABEL: pr35820:
|
||||
; INT256: # %bb.0: # %entry
|
||||
; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
|
||||
; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; INT256-NEXT: retq
|
||||
entry:
|
||||
%tmp1 = extractelement <8 x i32> %indices, i32 0
|
||||
%vecext2.8 = extractelement <4 x i32> %v, i32 %tmp1
|
||||
%tmp2 = extractelement <8 x i32> %indices, i32 1
|
||||
%vecext2.9 = extractelement <4 x i32> %v, i32 %tmp2
|
||||
%tmp3 = extractelement <8 x i32> %indices, i32 2
|
||||
%vecext2.10 = extractelement <4 x i32> %v, i32 %tmp3
|
||||
%tmp4 = extractelement <8 x i32> %indices, i32 3
|
||||
%vecext2.11 = extractelement <4 x i32> %v, i32 %tmp4
|
||||
%tmp5 = extractelement <8 x i32> %indices, i32 4
|
||||
%vecext2.12 = extractelement <4 x i32> %v, i32 %tmp5
|
||||
%tmp6 = extractelement <8 x i32> %indices, i32 5
|
||||
%vecext2.13 = extractelement <4 x i32> %v, i32 %tmp6
|
||||
%tmp7 = extractelement <8 x i32> %indices, i32 6
|
||||
%vecext2.14 = extractelement <4 x i32> %v, i32 %tmp7
|
||||
%tmp8 = extractelement <8 x i32> %indices, i32 7
|
||||
%vecext2.15 = extractelement <4 x i32> %v, i32 %tmp8
|
||||
%tmp9 = insertelement <8 x i32> undef, i32 %vecext2.8, i32 0
|
||||
%tmp10 = insertelement <8 x i32> %tmp9, i32 %vecext2.9, i32 1
|
||||
%tmp11 = insertelement <8 x i32> %tmp10, i32 %vecext2.10, i32 2
|
||||
%tmp12 = insertelement <8 x i32> %tmp11, i32 %vecext2.11, i32 3
|
||||
%tmp13 = insertelement <8 x i32> %tmp12, i32 %vecext2.12, i32 4
|
||||
%tmp14 = insertelement <8 x i32> %tmp13, i32 %vecext2.13, i32 5
|
||||
%tmp15 = insertelement <8 x i32> %tmp14, i32 %vecext2.14, i32 6
|
||||
%tmp16 = insertelement <8 x i32> %tmp15, i32 %vecext2.15, i32 7
|
||||
ret <8 x i32> %tmp16
|
||||
}
|
||||
|
||||
define <8 x float> @pr35820_float(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind {
|
||||
; AVX1-LABEL: pr35820_float:
|
||||
; AVX1: # %bb.0: # %entry
|
||||
; AVX1-NEXT: vpextrq $1, %xmm1, %r8
|
||||
; AVX1-NEXT: movq %r8, %r10
|
||||
; AVX1-NEXT: shrq $30, %r10
|
||||
; AVX1-NEXT: vmovq %xmm1, %r9
|
||||
; AVX1-NEXT: movq %r9, %rdx
|
||||
; AVX1-NEXT: shrq $30, %rdx
|
||||
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; AVX1-NEXT: andl $3, %r9d
|
||||
; AVX1-NEXT: andl $12, %edx
|
||||
; AVX1-NEXT: andl $3, %r8d
|
||||
; AVX1-NEXT: andl $12, %r10d
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
|
||||
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX1-NEXT: movq %rax, %rdi
|
||||
; AVX1-NEXT: shrq $30, %rdi
|
||||
; AVX1-NEXT: vmovq %xmm0, %rcx
|
||||
; AVX1-NEXT: movq %rcx, %rsi
|
||||
; AVX1-NEXT: shrq $30, %rsi
|
||||
; AVX1-NEXT: andl $3, %ecx
|
||||
; AVX1-NEXT: andl $12, %esi
|
||||
; AVX1-NEXT: andl $3, %eax
|
||||
; AVX1-NEXT: andl $12, %edi
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
|
||||
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; INT256-LABEL: pr35820_float:
|
||||
; INT256: # %bb.0: # %entry
|
||||
; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0
|
||||
; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; INT256-NEXT: retq
|
||||
entry:
|
||||
%tmp1 = extractelement <8 x i32> %indices, i32 0
|
||||
%vecext2.8 = extractelement <4 x float> %v, i32 %tmp1
|
||||
%tmp2 = extractelement <8 x i32> %indices, i32 1
|
||||
%vecext2.9 = extractelement <4 x float> %v, i32 %tmp2
|
||||
%tmp3 = extractelement <8 x i32> %indices, i32 2
|
||||
%vecext2.10 = extractelement <4 x float> %v, i32 %tmp3
|
||||
%tmp4 = extractelement <8 x i32> %indices, i32 3
|
||||
%vecext2.11 = extractelement <4 x float> %v, i32 %tmp4
|
||||
%tmp5 = extractelement <8 x i32> %indices, i32 4
|
||||
%vecext2.12 = extractelement <4 x float> %v, i32 %tmp5
|
||||
%tmp6 = extractelement <8 x i32> %indices, i32 5
|
||||
%vecext2.13 = extractelement <4 x float> %v, i32 %tmp6
|
||||
%tmp7 = extractelement <8 x i32> %indices, i32 6
|
||||
%vecext2.14 = extractelement <4 x float> %v, i32 %tmp7
|
||||
%tmp8 = extractelement <8 x i32> %indices, i32 7
|
||||
%vecext2.15 = extractelement <4 x float> %v, i32 %tmp8
|
||||
%tmp9 = insertelement <8 x float> undef, float %vecext2.8, i32 0
|
||||
%tmp10 = insertelement <8 x float> %tmp9, float %vecext2.9, i32 1
|
||||
%tmp11 = insertelement <8 x float> %tmp10, float %vecext2.10, i32 2
|
||||
%tmp12 = insertelement <8 x float> %tmp11, float %vecext2.11, i32 3
|
||||
%tmp13 = insertelement <8 x float> %tmp12, float %vecext2.12, i32 4
|
||||
%tmp14 = insertelement <8 x float> %tmp13, float %vecext2.13, i32 5
|
||||
%tmp15 = insertelement <8 x float> %tmp14, float %vecext2.14, i32 6
|
||||
%tmp16 = insertelement <8 x float> %tmp15, float %vecext2.15, i32 7
|
||||
ret <8 x float> %tmp16
|
||||
}
|
||||
|
||||
define <4 x i32> @big_source(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind {
|
||||
; AVX-LABEL: big_source:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: pushq %rbp
|
||||
; AVX-NEXT: movq %rsp, %rbp
|
||||
; AVX-NEXT: andq $-32, %rsp
|
||||
; AVX-NEXT: subq $64, %rsp
|
||||
; AVX-NEXT: vmovq %xmm1, %rax
|
||||
; AVX-NEXT: movq %rax, %rcx
|
||||
; AVX-NEXT: shrq $30, %rcx
|
||||
; AVX-NEXT: andl $28, %ecx
|
||||
; AVX-NEXT: vpextrq $1, %xmm1, %rdx
|
||||
; AVX-NEXT: movq %rdx, %rsi
|
||||
; AVX-NEXT: sarq $32, %rsi
|
||||
; AVX-NEXT: andl $7, %eax
|
||||
; AVX-NEXT: andl $7, %edx
|
||||
; AVX-NEXT: vmovaps %ymm0, (%rsp)
|
||||
; AVX-NEXT: andl $7, %esi
|
||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; AVX-NEXT: vpinsrd $1, (%rsp,%rcx), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0
|
||||
; AVX-NEXT: movq %rbp, %rsp
|
||||
; AVX-NEXT: popq %rbp
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
entry:
|
||||
%tmp1 = extractelement <4 x i32> %indices, i32 0
|
||||
%vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1
|
||||
%tmp2 = extractelement <4 x i32> %indices, i32 1
|
||||
%vecext2.9 = extractelement <8 x i32> %v, i32 %tmp2
|
||||
%tmp3 = extractelement <4 x i32> %indices, i32 2
|
||||
%vecext2.10 = extractelement <8 x i32> %v, i32 %tmp3
|
||||
%tmp4 = extractelement <4 x i32> %indices, i32 3
|
||||
%vecext2.11 = extractelement <8 x i32> %v, i32 %tmp4
|
||||
%tmp9 = insertelement <4 x i32> undef, i32 %vecext2.8, i32 0
|
||||
%tmp10 = insertelement <4 x i32> %tmp9, i32 %vecext2.9, i32 1
|
||||
%tmp11 = insertelement <4 x i32> %tmp10, i32 %vecext2.10, i32 2
|
||||
%tmp12 = insertelement <4 x i32> %tmp11, i32 %vecext2.11, i32 3
|
||||
ret <4 x i32> %tmp12
|
||||
}
|
||||
|
@ -135,3 +135,29 @@ Ltmp1:
|
||||
.cv_filechecksums # File index to string table offset subsection
|
||||
.cv_stringtable # String table
|
||||
|
||||
# CHECK-LABEL: FunctionLineTable [
|
||||
# CHECK: LinkageName: ?baz@@YAXXZ
|
||||
# CHECK: Flags: 0x1
|
||||
# CHECK: CodeSize: 0x3D
|
||||
# CHECK: FilenameSegment [
|
||||
# CHECK: Filename: D:\src\llvm\build\t.cpp (0x0)
|
||||
# CHECK: +0x0 [
|
||||
# CHECK: LineNumberStart: 13
|
||||
# CHECK: ]
|
||||
# CHECK: +0x1 [
|
||||
# CHECK: LineNumberStart: 14
|
||||
# CHECK: ]
|
||||
# CHECK: +0x8 [
|
||||
# CHECK: LineNumberStart: 15
|
||||
# CHECK: ]
|
||||
# There shouldn't be any other line number entries because all the other
|
||||
# .cv_locs are on line 15 where the top-level inline call site is.
|
||||
# CHECK-NOT: LineNumberStart
|
||||
# CHECK: +0x34 [
|
||||
# CHECK: LineNumberStart: 16
|
||||
# CHECK: ]
|
||||
# CHECK: +0x3B [
|
||||
# CHECK: LineNumberStart: 17
|
||||
# CHECK: ]
|
||||
# CHECK: ]
|
||||
# CHECK: ]
|
||||
|
@ -99,7 +99,8 @@
|
||||
// CHECK: shll $2, %eax
|
||||
sall $2, %eax
|
||||
|
||||
// CHECK: rep movsb
|
||||
// CHECK: rep
|
||||
// CHECK-NEXT: movsb
|
||||
rep # comment
|
||||
movsb
|
||||
|
||||
@ -1557,3 +1558,38 @@ ptwriteq 0xdeadbeef(%rbx,%rcx,8)
|
||||
// CHECK: ptwriteq %rax
|
||||
// CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0]
|
||||
ptwriteq %rax
|
||||
|
||||
// __asm __volatile(
|
||||
// "pushf \n\t"
|
||||
// "popf \n\t"
|
||||
// "rep \n\t"
|
||||
// ".byte 0x0f, 0xa7, 0xd0"
|
||||
// );
|
||||
// CHECK: pushfq
|
||||
// CHECK-NEXT: popfq
|
||||
// CHECK-NEXT: rep
|
||||
// CHECK-NEXT: .byte 15
|
||||
// CHECK-NEXT: .byte 167
|
||||
// CHECK-NEXT: .byte 208
|
||||
pushfq
|
||||
popfq
|
||||
rep
|
||||
.byte 15
|
||||
.byte 167
|
||||
.byte 208
|
||||
|
||||
// CHECK: lock
|
||||
// CHECK: cmpxchgl
|
||||
cmp $0, %edx
|
||||
je 1f
|
||||
lock
|
||||
1: cmpxchgl %ecx,(%rdi)
|
||||
|
||||
// CHECK: rep
|
||||
// CHECK-NEXT: byte
|
||||
rep
|
||||
.byte 0xa4 # movsb
|
||||
|
||||
// CHECK: lock
|
||||
// This line has to be the last one in the file
|
||||
lock
|
||||
|
46
test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll
Normal file
46
test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll
Normal file
@ -0,0 +1,46 @@
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-scei-ps4"
|
||||
|
||||
%struct.CFVS = type { %struct.Vec }
|
||||
%struct.Vec = type { i8 }
|
||||
%struct.S = type { i8 }
|
||||
|
||||
define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 {
|
||||
entry:
|
||||
%this.addr = alloca %struct.CFVS*, align 8
|
||||
store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8
|
||||
%this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8
|
||||
%m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dereferenceable(1) %struct.S* @_Z3Getv()
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "bz188598-b.cpp", directory: "")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !{i32 1, !"wchar_size", i32 2}
|
||||
!6 = !{i32 7, !"PIC Level", i32 2}
|
||||
!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2)
|
||||
!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS")
|
||||
!10 = !DIFile(filename: "./bz188598.h", directory: "")
|
||||
!11 = !{!12, !27}
|
||||
!12 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !9, file: !10, line: 9, baseType: !13, size: 8)
|
||||
!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !10, line: 4, size: 8, elements: !14, templateParams: !19, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
|
||||
!14 = !{!35}
|
||||
!19 = !{!20}
|
||||
!20 = !DITemplateValueParameter(name: "F", type: !21, value: %struct.S* ()* @_Z3Getv)
|
||||
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
|
||||
!22 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !10, line: 2, baseType: !23)
|
||||
!23 = !DISubroutineType(types: !24)
|
||||
!24 = !{!35}
|
||||
!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false)
|
||||
!28 = !DISubroutineType(types: !29)
|
||||
!29 = !{null, !30}
|
||||
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
|
||||
!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
69
test/ThinLTO/X86/dicompositetype-unique2.ll
Normal file
69
test/ThinLTO/X86/dicompositetype-unique2.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: opt -module-summary -o %t1.bc %s
|
||||
; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique2.ll
|
||||
; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3.
|
||||
; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
|
||||
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \
|
||||
; RUN: -r %t1.bc,_ZN1CD2Ev,pl \
|
||||
; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \
|
||||
; RUN: -r %t1.bc,_Z3Getv,l \
|
||||
; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \
|
||||
; RUN: -r %t2.bc,_Z3Getv,l
|
||||
; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s
|
||||
|
||||
; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}}
|
||||
; CHECK: define available_externally{{.*}} void @_ZN4CFVSD2Ev
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-scei-ps4"
|
||||
|
||||
%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }>
|
||||
%class.A = type { %struct.Vec }
|
||||
%struct.Vec = type { i8 }
|
||||
%struct.CFVS = type { %struct.Vec }
|
||||
%struct.S = type { i8 }
|
||||
|
||||
define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 !dbg !8 {
|
||||
entry:
|
||||
%this.addr = alloca %class.C*, align 8
|
||||
%this1 = load %class.C*, %class.C** %this.addr, align 8
|
||||
%m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2
|
||||
call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr
|
||||
|
||||
declare dereferenceable(1) %struct.S* @_Z3Getv()
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !{i32 1, !"wchar_size", i32 2}
|
||||
!6 = !{i32 7, !"PIC Level", i32 2}
|
||||
!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !9, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !46, variables: !2)
|
||||
!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !1, line: 5, size: 128, elements: !10, vtableHolder: !9, identifier: "_ZTS1C")
|
||||
!10 = !{!38, !46}
|
||||
!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !16, line: 4, size: 8, elements: !17, templateParams: !22, identifier: "_ZTS3VecIXadL_Z3GetvEEE")
|
||||
!16 = !DIFile(filename: "./bz188598.h", directory: ".")
|
||||
!17 = !{!55}
|
||||
!22 = !{!23}
|
||||
!23 = !DITemplateValueParameter(name: "F", type: !24, value: %struct.S* ()* @_Z3Getv)
|
||||
!24 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64)
|
||||
!25 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !16, line: 2, baseType: !26)
|
||||
!26 = !DISubroutineType(types: !27)
|
||||
!27 = !{!55}
|
||||
!38 = !DIDerivedType(tag: DW_TAG_member, name: "m", scope: !9, file: !1, line: 7, baseType: !39, size: 8, offset: 72)
|
||||
!39 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !16, line: 7, size: 8, elements: !40, identifier: "_ZTS4CFVS")
|
||||
!40 = !{!41}
|
||||
!41 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !39, file: !16, line: 9, baseType: !15, size: 8)
|
||||
!46 = !DISubprogram(name: "~C", scope: !9, file: !1, line: 6, type: !47, isLocal: false, isDefinition: false, scopeLine: 6, containingType: !9, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 0, flags: DIFlagPrototyped, isOptimized: false)
|
||||
!47 = !DISubroutineType(types: !48)
|
||||
!48 = !{!55}
|
||||
!50 = !DILocation(line: 9, scope: !51)
|
||||
!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9)
|
||||
!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
19
test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
Normal file
19
test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK
|
||||
target datalayout =
|
||||
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Select when both offset and scale reg are present.
|
||||
define i64 @test1(i1 %c, i64* %b, i64 %scale) {
|
||||
; CHECK-LABEL: @test1
|
||||
entry:
|
||||
; CHECK-LABEL: entry:
|
||||
%g = getelementptr inbounds i64, i64* %b, i64 %scale
|
||||
%g1 = getelementptr inbounds i64, i64* %g, i64 8
|
||||
%g2 = getelementptr inbounds i64, i64* %g, i64 16
|
||||
%s = select i1 %c, i64* %g1, i64* %g2
|
||||
; CHECK-NOT: sunkaddr
|
||||
%v = load i64 , i64* %s, align 8
|
||||
ret i64 %v
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: opt -S -gvn-hoist < %s | FileCheck %s
|
||||
; CHECK-LABEL: build_tree
|
||||
; CHECK: load
|
||||
; CHECK: load
|
||||
; Check that the load is not hoisted because the call can potentially
|
||||
@ -23,3 +24,47 @@ do.end: ; preds = %do.body
|
||||
}
|
||||
|
||||
declare i1 @pqdownheap(i32)
|
||||
|
||||
@i = external hidden unnamed_addr global i32, align 4
|
||||
@j = external hidden unnamed_addr global [573 x i32], align 4
|
||||
@v = external global i1
|
||||
|
||||
; CHECK-LABEL: test
|
||||
; CHECK-LABEL: do.end
|
||||
; CHECK: load
|
||||
; Check that the load is not hoisted because the call can potentially
|
||||
; modify the global
|
||||
|
||||
define i32 @test() {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond:
|
||||
%a3 = load volatile i1, i1* @v
|
||||
br i1 %a3, label %for.body, label %while.end
|
||||
|
||||
for.body:
|
||||
br label %if.then
|
||||
|
||||
if.then:
|
||||
%tmp4 = load i32, i32* @i, align 4
|
||||
br label %for.cond
|
||||
|
||||
while.end:
|
||||
br label %do.body
|
||||
|
||||
do.body:
|
||||
%tmp9 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
|
||||
i32 0, i32 1), align 4
|
||||
%tmp10 = load i32, i32* @i, align 4
|
||||
call void @fn()
|
||||
%a1 = load volatile i1, i1* @v
|
||||
br i1 %a1, label %do.body, label %do.end
|
||||
|
||||
do.end:
|
||||
%tmp20 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j,
|
||||
i32 0, i32 1), align 4
|
||||
ret i32 %tmp20
|
||||
}
|
||||
|
||||
declare void @fn()
|
||||
|
43
test/Transforms/JumpThreading/ddt-crash3.ll
Normal file
43
test/Transforms/JumpThreading/ddt-crash3.ll
Normal file
@ -0,0 +1,43 @@
|
||||
; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@global = external local_unnamed_addr global i64, align 8
|
||||
@global.1 = external local_unnamed_addr global i64, align 8
|
||||
@global.2 = external local_unnamed_addr global i64, align 8
|
||||
|
||||
; Function Attrs: norecurse noreturn nounwind uwtable
|
||||
define void @hoge() local_unnamed_addr #0 {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb26, %bb
|
||||
%tmp = load i64, i64* @global, align 8, !tbaa !1
|
||||
%tmp2 = icmp eq i64 %tmp, 0
|
||||
br i1 %tmp2, label %bb27, label %bb3
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
%tmp4 = load i64, i64* @global.1, align 8, !tbaa !1
|
||||
%tmp5 = icmp eq i64 %tmp4, 0
|
||||
br i1 %tmp5, label %bb23, label %bb23
|
||||
|
||||
bb23: ; preds = %bb3, %bb3
|
||||
br label %bb26
|
||||
|
||||
bb26: ; preds = %bb27, %bb23
|
||||
br label %bb1
|
||||
|
||||
bb27: ; preds = %bb1
|
||||
br label %bb26
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"clang version 7.0.0 "}
|
||||
!1 = !{!2, !2, i64 0}
|
||||
!2 = !{!"long", !3, i64 0}
|
||||
!3 = !{!"omnipotent char", !4, i64 0}
|
||||
!4 = !{!"Simple C/C++ TBAA"}
|
75
test/Transforms/JumpThreading/ddt-crash4.ll
Normal file
75
test/Transforms/JumpThreading/ddt-crash4.ll
Normal file
@ -0,0 +1,75 @@
|
||||
; RUN: opt < %s -jump-threading -disable-output -verify-dom-info
|
||||
@global = external global i64, align 8
|
||||
|
||||
define void @f() {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
%tmp = load i64, i64* @global, align 8
|
||||
%tmp2 = icmp eq i64 %tmp, 0
|
||||
br i1 %tmp2, label %bb27, label %bb3
|
||||
|
||||
bb3:
|
||||
%tmp4 = load i64, i64* @global, align 8
|
||||
%tmp5 = icmp eq i64 %tmp4, 0
|
||||
br i1 %tmp5, label %bb6, label %bb7
|
||||
|
||||
bb6:
|
||||
br label %bb7
|
||||
|
||||
bb7:
|
||||
%tmp8 = phi i1 [ true, %bb3 ], [ undef, %bb6 ]
|
||||
%tmp9 = select i1 %tmp8, i64 %tmp4, i64 0
|
||||
br i1 false, label %bb10, label %bb23
|
||||
|
||||
bb10:
|
||||
%tmp11 = load i64, i64* @global, align 8
|
||||
%tmp12 = icmp slt i64 %tmp11, 5
|
||||
br i1 %tmp12, label %bb13, label %bb17
|
||||
|
||||
bb13:
|
||||
br label %bb14
|
||||
|
||||
bb14:
|
||||
br i1 undef, label %bb15, label %bb16
|
||||
|
||||
bb15:
|
||||
unreachable
|
||||
|
||||
bb16:
|
||||
br label %bb10
|
||||
|
||||
bb17:
|
||||
br label %bb18
|
||||
|
||||
bb18:
|
||||
br i1 undef, label %bb22, label %bb13
|
||||
|
||||
bb19:
|
||||
br i1 undef, label %bb20, label %bb21
|
||||
|
||||
bb20:
|
||||
unreachable
|
||||
|
||||
bb21:
|
||||
br label %bb18
|
||||
|
||||
bb22:
|
||||
br label %bb23
|
||||
|
||||
bb23:
|
||||
br i1 undef, label %bb24, label %bb13
|
||||
|
||||
bb24:
|
||||
br i1 undef, label %bb26, label %bb25
|
||||
|
||||
bb25:
|
||||
br label %bb19
|
||||
|
||||
bb26:
|
||||
br label %bb1
|
||||
|
||||
bb27:
|
||||
br label %bb24
|
||||
}
|
53
test/Transforms/LoopVectorize/pr35773.ll
Normal file
53
test/Transforms/LoopVectorize/pr35773.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
@a = common local_unnamed_addr global i32 0, align 4
|
||||
@b = common local_unnamed_addr global i8 0, align 1
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define void @doit1() local_unnamed_addr{
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%main.iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
|
||||
%i8.iv = phi i8 [ 0, %entry ], [ %i8.add, %for.body ]
|
||||
%i32.iv = phi i32 [ 0, %entry ], [ %i32.add, %for.body ]
|
||||
|
||||
%trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8
|
||||
%i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv
|
||||
|
||||
%noop.conv.under.pse = and i32 %i32.iv, 255
|
||||
%i32.add = add nuw nsw i32 %noop.conv.under.pse, 9
|
||||
|
||||
%inc = add i32 %main.iv, 1
|
||||
%tobool = icmp eq i32 %inc, 16
|
||||
br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body
|
||||
|
||||
; CHECK-LABEL: @doit1(
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
|
||||
; CHECK-NEXT: [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ <i8 0, i8 9, i8 18, i8 27>, [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0
|
||||
|
||||
; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]]
|
||||
|
||||
; CHECK-NEXT: [[MAIN_IV_NEXT]] = add i32 [[MAIN_IV]], 4
|
||||
; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], <i32 36, i32 36, i32 36, i32 36>
|
||||
; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], <i8 36, i8 36, i8 36, i8 36>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
|
||||
|
||||
for.cond.for.end_crit_edge:
|
||||
store i8 %i8.add, i8* @b, align 1
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
74
test/Transforms/SLPVectorizer/X86/PR35628_1.ll
Normal file
74
test/Transforms/SLPVectorizer/X86/PR35628_1.ll
Normal file
@ -0,0 +1,74 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
|
||||
define void @mainTest(i32* %ptr) #0 {
|
||||
; CHECK-LABEL: @mainTest(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA5:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], undef
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], undef
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], undef
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
|
||||
; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
|
||||
; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
|
||||
; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef
|
||||
; CHECK-NEXT: br label [[LOOP]]
|
||||
; CHECK: bail_out:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%cmp = icmp eq i32* %ptr, null
|
||||
br i1 %cmp, label %loop, label %bail_out
|
||||
|
||||
loop:
|
||||
%dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ]
|
||||
%0 = load i32, i32 * %ptr , align 4
|
||||
%1 = mul i32 %0, %0
|
||||
%2 = add i32 1, %1
|
||||
%3 = getelementptr inbounds i32, i32 * %ptr, i64 1
|
||||
%4 = load i32, i32 * %3 , align 4
|
||||
%5 = mul i32 %4, %4
|
||||
%6 = add i32 %2, %4
|
||||
%7 = add i32 %6, %5
|
||||
%8 = getelementptr inbounds i32, i32 *%ptr, i64 2
|
||||
%9 = load i32, i32 * %8 , align 4
|
||||
%10 = mul i32 %9, %9
|
||||
%11 = add i32 %7, %9
|
||||
%12 = add i32 %11, %10
|
||||
%13 = sext i32 %9 to i64
|
||||
%14 = getelementptr inbounds i32, i32 *%ptr, i64 3
|
||||
%15 = load i32, i32 * %14 , align 4
|
||||
%16 = mul i32 %15, %15
|
||||
%17 = add i32 %12, %15
|
||||
%18 = add i32 %17, %16
|
||||
br label %loop
|
||||
|
||||
bail_out:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="westmere" }
|
||||
|
64
test/Transforms/SLPVectorizer/X86/PR35628_2.ll
Normal file
64
test/Transforms/SLPVectorizer/X86/PR35628_2.ll
Normal file
@ -0,0 +1,64 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
|
||||
define void @test() #0 {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 3, i64 2, i64 1, i64 0>, [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
|
||||
; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef
|
||||
; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef
|
||||
; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0
|
||||
; CHECK-NEXT: [[JOIN:%.*]] = add i64 undef, [[ZSUM]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
|
||||
; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
|
||||
; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
|
||||
; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef
|
||||
; CHECK-NEXT: br label [[LOOP]]
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ]
|
||||
%0 = phi i64 [ 2, %entry ], [ %fork, %loop ]
|
||||
%inc1 = add i64 %0, 1
|
||||
%inc2 = add i64 %0, 2
|
||||
%inc11 = add i64 1, %inc1
|
||||
%exact1 = ashr exact i64 %inc11, 32
|
||||
%inc3 = add i64 %0, 3
|
||||
%dummy_add = add i16 0, 0
|
||||
%inc12 = add i64 1, %inc2
|
||||
%exact2 = ashr exact i64 %inc12, 32
|
||||
%dummy_shl = shl i64 %inc3, 32
|
||||
%inc13 = add i64 1, %inc3
|
||||
%exact3 = ashr exact i64 %inc13, 32
|
||||
%fork = add i64 %0, 0
|
||||
%sum1 = add i64 %exact3, %exact2
|
||||
%sum2 = add i64 %sum1, %exact1
|
||||
%zsum = add i64 %sum2, 0
|
||||
%sext22 = add i64 1, %fork
|
||||
%exact4 = ashr exact i64 %sext22, 32
|
||||
%join = add i64 %fork, %zsum
|
||||
%last = add i64 %join, %exact4
|
||||
br label %loop
|
||||
}
|
||||
|
48
test/Transforms/SLPVectorizer/X86/PR35777.ll
Normal file
48
test/Transforms/SLPVectorizer/X86/PR35777.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
|
||||
|
||||
@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
|
||||
|
||||
define { i64, i64 } @patatino(double %arg) {
|
||||
; CHECK-LABEL: @patatino(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
|
||||
; CHECK-NEXT: ret { i64, i64 } [[TMP17]]
|
||||
;
|
||||
bb:
|
||||
%tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
|
||||
%tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
|
||||
%tmp2 = fmul double %tmp1, %arg
|
||||
%tmp3 = fadd double %tmp, %tmp2
|
||||
%tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
|
||||
%tmp5 = fadd double %tmp4, %tmp3
|
||||
%tmp6 = fptosi double %tmp5 to i32
|
||||
%tmp7 = sext i32 %tmp6 to i64
|
||||
%tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
|
||||
%tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
|
||||
%tmp10 = fmul double %tmp9, %arg
|
||||
%tmp11 = fadd double %tmp8, %tmp10
|
||||
%tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
|
||||
%tmp13 = fadd double %tmp12, %tmp11
|
||||
%tmp14 = fptosi double %tmp13 to i32
|
||||
%tmp15 = sext i32 %tmp14 to i64
|
||||
%tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
|
||||
%tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
|
||||
ret { i64, i64 } %tmp17
|
||||
}
|
27
test/Transforms/SLPVectorizer/X86/PR35865.ll
Normal file
27
test/Transforms/SLPVectorizer/X86/PR35865.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s
|
||||
|
||||
define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
|
||||
; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
|
||||
; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32
|
||||
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5
|
||||
; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32
|
||||
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%0 = extractelement <16 x half> undef, i32 4
|
||||
%conv.i.4.i = fpext half %0 to float
|
||||
%1 = bitcast float %conv.i.4.i to i32
|
||||
%vecins.i.4.i = insertelement <8 x i32> undef, i32 %1, i32 4
|
||||
%2 = extractelement <16 x half> undef, i32 5
|
||||
%conv.i.5.i = fpext half %2 to float
|
||||
%3 = bitcast float %conv.i.5.i to i32
|
||||
%vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5
|
||||
ret void
|
||||
}
|
@ -7,8 +7,8 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -20,8 +20,8 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
|
||||
; CHECK-NEXT: ret <4 x float> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -64,18 +64,18 @@ declare void @llvm.assume(i1) nounwind
|
||||
; This entire tree is ephemeral, don't vectorize any of it.
|
||||
define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_eph(
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
|
||||
@ -100,18 +100,18 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
|
||||
; CHECK-NEXT: ret <4 x float> undef
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_eph(
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
|
||||
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
|
||||
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
|
||||
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
|
||||
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
|
||||
@ -175,8 +175,8 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32>
|
||||
; doesn't matter
|
||||
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_insert_out_of_order(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -188,8 +188,8 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float
|
||||
; CHECK-NEXT: ret <4 x float> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -233,8 +233,8 @@ declare void @f32_user(float) #0
|
||||
; Multiple users of the final constructed vector
|
||||
define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_users(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -247,8 +247,8 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
|
||||
; CHECK-NEXT: ret <4 x float> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_users(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
|
||||
@ -291,18 +291,18 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32
|
||||
; Unused insertelement
|
||||
define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_no_users(
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
|
||||
@ -330,18 +330,18 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
|
||||
; CHECK-NEXT: ret <4 x float> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_no_users(
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
|
||||
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
|
||||
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
|
||||
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
|
||||
; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
|
||||
@ -387,25 +387,25 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x
|
||||
; to do this backwards this backwards
|
||||
define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @reconstruct(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
|
||||
; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
|
||||
; CHECK-NEXT: ret <4 x i32> [[RD]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @reconstruct(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
|
||||
; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
|
||||
; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
|
||||
; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]]
|
||||
;
|
||||
%c0 = extractelement <4 x i32> %c, i32 0
|
||||
@ -421,8 +421,8 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
|
||||
|
||||
define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_v2(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
|
||||
@ -430,12 +430,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
|
||||
; CHECK-NEXT: ret <2 x float> [[RB]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_v2(
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
|
||||
; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
|
||||
; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
|
||||
@ -464,12 +464,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %
|
||||
; (low cost threshold needed to force this to happen)
|
||||
define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
|
||||
; CHECK-LABEL: @simple_select_partial_vector(
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
|
||||
@ -485,12 +485,12 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
|
||||
; CHECK-NEXT: ret <4 x float> [[RB]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @simple_select_partial_vector(
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
|
||||
; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
|
||||
@ -530,7 +530,7 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b,
|
||||
; must be rescheduled. The case here is from compiling Julia.
|
||||
define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @reschedule_extract(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
|
||||
@ -542,7 +542,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-NEXT: ret <4 x float> [[V3]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @reschedule_extract(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
|
||||
@ -576,7 +576,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
|
||||
; instructions that are erased.
|
||||
define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @take_credit(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
|
||||
@ -588,7 +588,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-NEXT: ret <4 x float> [[V3]]
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @take_credit(
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
|
||||
@ -622,10 +622,10 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
|
||||
define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
|
||||
; CHECK-LABEL: @multi_tree(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
|
||||
@ -640,10 +640,10 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @multi_tree(
|
||||
; ZEROTHRESH-NEXT: entry:
|
||||
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
|
||||
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
|
||||
; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
|
||||
; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
|
||||
; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
|
||||
; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
|
||||
@ -675,7 +675,7 @@ entry:
|
||||
define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: @_vadd256(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
|
||||
@ -696,7 +696,7 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
|
||||
;
|
||||
; ZEROTHRESH-LABEL: @_vadd256(
|
||||
; ZEROTHRESH-NEXT: entry:
|
||||
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
|
||||
; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
|
||||
; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
|
||||
; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
|
||||
|
@ -1,11 +1,30 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: julia_2xdouble
|
||||
; CHECK: load <2 x double>
|
||||
; CHECK: load <2 x double>
|
||||
; CHECK: fmul <2 x double>
|
||||
; CHECK: fadd <2 x double>
|
||||
define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
|
||||
; CHECK-LABEL: @julia_2xdouble(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
|
||||
; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
|
||||
%x0 = load double, double* %px0, align 4
|
||||
@ -29,12 +48,40 @@ top:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: julia_4xfloat
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: fmul <4 x float>
|
||||
; CHECK: fadd <4 x float>
|
||||
define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
|
||||
; CHECK-LABEL: @julia_4xfloat(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
|
||||
; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
|
||||
; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
|
||||
; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
|
||||
; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
|
||||
; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
|
||||
; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
|
||||
; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
|
||||
; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
|
||||
%x0 = load float, float* %px0, align 4
|
||||
@ -76,9 +123,27 @@ top:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: julia_load_array_of_float
|
||||
; CHECK: fsub <4 x float>
|
||||
define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
|
||||
; CHECK-LABEL: @julia_load_array_of_float(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
|
||||
; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%a_arr = load [4 x float], [4 x float]* %a, align 4
|
||||
%a0 = extractvalue [4 x float] %a_arr, 0
|
||||
@ -102,11 +167,27 @@ top:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: julia_load_array_of_i32
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: load <4 x i32>
|
||||
; CHECK: sub <4 x i32>
|
||||
define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
|
||||
; CHECK-LABEL: @julia_load_array_of_i32(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
|
||||
; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%a_arr = load [4 x i32], [4 x i32]* %a, align 4
|
||||
%a0 = extractvalue [4 x i32] %a_arr, 0
|
||||
@ -132,9 +213,30 @@ top:
|
||||
|
||||
; Almost identical to previous test, but for type that should NOT be vectorized.
|
||||
;
|
||||
; CHECK-LABEL: julia_load_array_of_i16
|
||||
; CHECK-NOT: i2>
|
||||
define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
|
||||
; CHECK-LABEL: @julia_load_array_of_i16(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
|
||||
; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
|
||||
; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
|
||||
; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
|
||||
; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
|
||||
; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
|
||||
; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
|
||||
; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
|
||||
; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
|
||||
; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]]
|
||||
; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
|
||||
; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]]
|
||||
; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]]
|
||||
; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
|
||||
; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
|
||||
; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]]
|
||||
; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
|
||||
; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
|
||||
; CHECK-NEXT: store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%a_arr = load [4 x i16], [4 x i16]* %a, align 4
|
||||
%a0 = extractvalue [4 x i16] %a_arr, 0
|
||||
@ -160,11 +262,27 @@ top:
|
||||
|
||||
%pseudovec = type { float, float, float, float }
|
||||
|
||||
; CHECK-LABEL: julia_load_struct_of_float
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: load <4 x float>
|
||||
; CHECK: fsub <4 x float>
|
||||
define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
|
||||
; CHECK-LABEL: @julia_load_struct_of_float(
|
||||
; CHECK-NEXT: top:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
||||
; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
|
||||
; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
|
||||
; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
top:
|
||||
%a_struct = load %pseudovec, %pseudovec* %a, align 4
|
||||
%a0 = extractvalue %pseudovec %a_struct, 0
|
||||
|
@ -1,15 +1,46 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-grtev3-linux-gnu"
|
||||
|
||||
; We used to crash on this example because we were building a constant
|
||||
; expression during vectorization and the vectorizer expects instructions
|
||||
; as elements of the vectorized tree.
|
||||
; CHECK-LABEL: @test
|
||||
; PR19621
|
||||
|
||||
define void @test() {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: bb279:
|
||||
; CHECK-NEXT: br label [[BB283:%.*]]
|
||||
; CHECK: bb283:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
|
||||
; CHECK-NEXT: br label [[BB284:%.*]]
|
||||
; CHECK: bb284:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
|
||||
; CHECK-NEXT: br label [[BB21_I:%.*]]
|
||||
; CHECK: bb21.i:
|
||||
; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
|
||||
; CHECK: bb22.i:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
|
||||
; CHECK-NEXT: br label [[BB32_I:%.*]]
|
||||
; CHECK: bb32.i:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
|
||||
; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
|
||||
; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float
|
||||
; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
|
||||
; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
|
||||
; CHECK-NEXT: br label [[BB283]]
|
||||
;
|
||||
bb279:
|
||||
br label %bb283
|
||||
|
||||
@ -62,6 +93,12 @@ exit:
|
||||
; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
|
||||
; The code that handles insertelement instructions must handle this.
|
||||
define <4 x double> @constant_folding() {
|
||||
; CHECK-LABEL: @constant_folding(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
|
||||
; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
|
||||
; CHECK-NEXT: ret <4 x double> [[I2]]
|
||||
;
|
||||
entry:
|
||||
%t0 = fadd double 1.000000e+00 , 0.000000e+00
|
||||
%t1 = fadd double 1.000000e+00 , 1.000000e+00
|
||||
@ -71,10 +108,3 @@ entry:
|
||||
%i2 = insertelement <4 x double> %i1, double %t3, i32 0
|
||||
ret <4 x double> %i2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @constant_folding
|
||||
; CHECK: %[[V0:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 0
|
||||
; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
|
||||
; CHECK: %[[V2:.+]] = extractelement <2 x double> <double 1.000000e+00, double 2.000000e+00>, i32 1
|
||||
; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
|
||||
; CHECK: ret <4 x double> %[[V3]]
|
||||
|
@ -0,0 +1,77 @@
|
||||
; XFAIL: *
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s
|
||||
|
||||
; FIXME: Merge into backedge-id-bug
|
||||
; Variant which has an issue with region construction
|
||||
|
||||
define amdgpu_kernel void @loop_backedge_misidentified_alt(i32 addrspace(1)* %arg0) #0 {
|
||||
entry:
|
||||
%tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
|
||||
%load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
|
||||
%i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
|
||||
br label %LOOP.HEADER
|
||||
|
||||
LOOP.HEADER:
|
||||
%i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
|
||||
call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
|
||||
%tmp12 = zext i32 %i to i64
|
||||
%tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
|
||||
%tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
|
||||
%tmp15 = extractelement <4 x i32> %tmp14, i64 0
|
||||
%tmp16 = and i32 %tmp15, 65535
|
||||
%tmp17 = icmp eq i32 %tmp16, 1
|
||||
br i1 %tmp17, label %bb18, label %bb62
|
||||
|
||||
bb18:
|
||||
%tmp19 = extractelement <2 x i32> %tmp, i64 0
|
||||
%tmp22 = lshr i32 %tmp19, 16
|
||||
%tmp24 = urem i32 %tmp22, 52
|
||||
%tmp25 = mul nuw nsw i32 %tmp24, 52
|
||||
br label %INNER_LOOP
|
||||
|
||||
INNER_LOOP:
|
||||
%inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
|
||||
call void asm sideeffect "; inner loop body", ""() #0
|
||||
%inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
|
||||
%inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
|
||||
br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
|
||||
|
||||
INNER_LOOP_BREAK:
|
||||
%tmp59 = extractelement <4 x i32> %tmp14, i64 2
|
||||
call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
|
||||
br label %END_ELSE_BLOCK
|
||||
|
||||
bb62:
|
||||
%load13 = icmp ult i32 %tmp16, 271
|
||||
;br i1 %load13, label %bb64, label %INCREMENT_I
|
||||
; branching directly to the return avoids the bug
|
||||
br i1 %load13, label %RETURN, label %INCREMENT_I
|
||||
|
||||
|
||||
bb64:
|
||||
call void asm sideeffect "s_nop 42", "~{memory}"() #0
|
||||
br label %RETURN
|
||||
|
||||
INCREMENT_I:
|
||||
%inc.i = add i32 %i, 1
|
||||
call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
|
||||
br label %END_ELSE_BLOCK
|
||||
|
||||
END_ELSE_BLOCK:
|
||||
%i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
|
||||
call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
|
||||
%cmp.end.else.block = icmp eq i32 %i.final, -1
|
||||
br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
|
||||
|
||||
RETURN:
|
||||
call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
|
||||
store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { convergent nounwind }
|
||||
attributes #1 = { convergent nounwind readnone }
|
163
test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
Normal file
163
test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll
Normal file
@ -0,0 +1,163 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s
|
||||
|
||||
; StructurizeCFG::orderNodes used an arbitrary and nonsensical sorting
|
||||
; function which broke the basic backedge identification algorithm. It
|
||||
; would use RPO order, but then do a weird partial sort by the loop
|
||||
; depth assuming blocks are sorted by loop. However a block can appear
|
||||
; in between blocks of a loop that is not part of a loop, breaking the
|
||||
; assumption of the sort.
|
||||
;
|
||||
; The collectInfos must be done in RPO order. The actual
|
||||
; structurization order I think is less important, but unless the loop
|
||||
; headers are identified in RPO order, it finds the wrong set of back
|
||||
; edges.
|
||||
|
||||
define amdgpu_kernel void @loop_backedge_misidentified(i32 addrspace(1)* %arg0) #0 {
|
||||
; CHECK-LABEL: @loop_backedge_misidentified(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
|
||||
; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef
|
||||
; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]]
|
||||
; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4
|
||||
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
|
||||
; CHECK: LOOP.HEADER:
|
||||
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[FLOW4:%.*]] ]
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: bb18:
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP19]], 16
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = urem i32 [[TMP22]], 52
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52
|
||||
; CHECK-NEXT: br label [[INNER_LOOP:%.*]]
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]]
|
||||
; CHECK: INNER_LOOP:
|
||||
; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ]
|
||||
; CHECK-NEXT: call void asm sideeffect "
|
||||
; CHECK-NEXT: [[INNER_LOOP_J_INC]] = add nsw i32 [[INNER_LOOP_J]], 1
|
||||
; CHECK-NEXT: [[INNER_LOOP_CMP:%.*]] = icmp eq i32 [[INNER_LOOP_J]], 0
|
||||
; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]]
|
||||
; CHECK: INNER_LOOP_BREAK:
|
||||
; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
|
||||
; CHECK-NEXT: br label [[FLOW3:%.*]]
|
||||
; CHECK: bb62:
|
||||
; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[LOAD13]], true
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[INCREMENT_I:%.*]], label [[FLOW1:%.*]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I]] ], [ undef, [[BB62]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[BB64:%.*]], label [[FLOW2:%.*]]
|
||||
; CHECK: bb64:
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0
|
||||
; CHECK-NEXT: br label [[FLOW2]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP_HEADER]] ]
|
||||
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW3]]
|
||||
; CHECK: INCREMENT_I:
|
||||
; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
; CHECK: END_ELSE_BLOCK:
|
||||
; CHECK-NEXT: [[I_FINAL:%.*]] = phi i32 [ [[TMP1]], [[FLOW3]] ]
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337
|
||||
; CHECK-NEXT: [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[I_FINAL]], -1
|
||||
; CHECK-NEXT: br label [[FLOW4]]
|
||||
; CHECK: Flow4:
|
||||
; CHECK-NEXT: [[TMP10]] = phi i32 [ [[I_FINAL]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW3]] ]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP11]], label [[RETURN:%.*]], label [[LOOP_HEADER]]
|
||||
; CHECK: RETURN:
|
||||
; CHECK-NEXT: call void asm sideeffect "s_nop 0x99
|
||||
; CHECK-NEXT: store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16
|
||||
%load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid
|
||||
%i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4
|
||||
br label %LOOP.HEADER
|
||||
|
||||
LOOP.HEADER:
|
||||
%i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ]
|
||||
call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0
|
||||
%tmp12 = zext i32 %i to i64
|
||||
%tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12
|
||||
%tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16
|
||||
%tmp15 = extractelement <4 x i32> %tmp14, i64 0
|
||||
%tmp16 = and i32 %tmp15, 65535
|
||||
%tmp17 = icmp eq i32 %tmp16, 1
|
||||
br i1 %tmp17, label %bb18, label %bb62
|
||||
|
||||
bb18:
|
||||
%tmp19 = extractelement <2 x i32> %tmp, i64 0
|
||||
%tmp22 = lshr i32 %tmp19, 16
|
||||
%tmp24 = urem i32 %tmp22, 52
|
||||
%tmp25 = mul nuw nsw i32 %tmp24, 52
|
||||
br label %INNER_LOOP
|
||||
|
||||
INNER_LOOP:
|
||||
%inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ]
|
||||
call void asm sideeffect "; inner loop body", ""() #0
|
||||
%inner.loop.j.inc = add nsw i32 %inner.loop.j, 1
|
||||
%inner.loop.cmp = icmp eq i32 %inner.loop.j, 0
|
||||
br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP
|
||||
|
||||
INNER_LOOP_BREAK:
|
||||
%tmp59 = extractelement <4 x i32> %tmp14, i64 2
|
||||
call void asm sideeffect "s_nop 23 ", "~{memory}"() #0
|
||||
br label %END_ELSE_BLOCK
|
||||
|
||||
bb62:
|
||||
%load13 = icmp ult i32 %tmp16, 271
|
||||
br i1 %load13, label %bb64, label %INCREMENT_I
|
||||
|
||||
bb64:
|
||||
call void asm sideeffect "s_nop 42", "~{memory}"() #0
|
||||
br label %RETURN
|
||||
|
||||
INCREMENT_I:
|
||||
%inc.i = add i32 %i, 1
|
||||
call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0
|
||||
br label %END_ELSE_BLOCK
|
||||
|
||||
END_ELSE_BLOCK:
|
||||
%i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ]
|
||||
call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0
|
||||
%cmp.end.else.block = icmp eq i32 %i.final, -1
|
||||
br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER
|
||||
|
||||
RETURN:
|
||||
call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0
|
||||
store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; The same function, except break to return block goes directly to the
|
||||
; return, which managed to hide the bug.
|
||||
; FIXME: Merge variant from backedge-id-bug-xfail
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { convergent nounwind }
|
||||
attributes #1 = { convergent nounwind readnone }
|
2
test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
Normal file
2
test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not 'AMDGPU' in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,32 +1,76 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
|
||||
|
||||
define void @main(float addrspace(1)* %out) {
|
||||
|
||||
; CHECK: main_body:
|
||||
; CHECK: br label %LOOP.outer
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: main_body:
|
||||
; CHECK-NEXT: br label [[LOOP_OUTER:%.*]]
|
||||
; CHECK: LOOP.outer:
|
||||
; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ]
|
||||
; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ]
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: LOOP:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ]
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]]
|
||||
; CHECK: ENDLOOP:
|
||||
; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3
|
||||
; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00
|
||||
; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]]
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: ENDIF:
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ]
|
||||
; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]]
|
||||
; CHECK: IF29:
|
||||
; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2
|
||||
; CHECK-NEXT: br label [[FLOW2]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]]
|
||||
; CHECK: ENDIF28:
|
||||
; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00
|
||||
; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
;
|
||||
main_body:
|
||||
br label %LOOP.outer
|
||||
|
||||
; CHECK: LOOP.outer:
|
||||
; CHECK: br label %LOOP
|
||||
LOOP.outer: ; preds = %ENDIF28, %main_body
|
||||
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
|
||||
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
|
||||
br label %LOOP
|
||||
|
||||
; CHECK: LOOP:
|
||||
; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
|
||||
LOOP: ; preds = %IF29, %LOOP.outer
|
||||
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
|
||||
%tmp20 = add i32 %temp4.0, 1
|
||||
%tmp22 = icmp sgt i32 %tmp20, 3
|
||||
br i1 %tmp22, label %ENDLOOP, label %ENDIF
|
||||
|
||||
; CHECK: Flow3
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
|
||||
|
||||
; CHECK: ENDLOOP:
|
||||
; CHECK: ret void
|
||||
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
|
||||
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
|
||||
%tmp23 = icmp eq i32 %tmp20, 3
|
||||
@ -34,29 +78,14 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO
|
||||
store float %.45, float addrspace(1)* %out
|
||||
ret void
|
||||
|
||||
; CHECK: ENDIF:
|
||||
; CHECK: br i1 %tmp31, label %IF29, label %Flow1
|
||||
ENDIF: ; preds = %LOOP
|
||||
%tmp31 = icmp sgt i32 %tmp20, 1
|
||||
br i1 %tmp31, label %IF29, label %ENDIF28
|
||||
|
||||
; CHECK: Flow:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
|
||||
|
||||
; CHECK: IF29:
|
||||
; CHECK: br label %Flow1
|
||||
IF29: ; preds = %ENDIF
|
||||
%tmp32 = icmp sgt i32 %tmp20, 2
|
||||
br i1 %tmp32, label %ENDLOOP, label %LOOP
|
||||
|
||||
; CHECK: Flow1:
|
||||
; CHECK: br label %Flow
|
||||
|
||||
; CHECK: Flow2:
|
||||
; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
|
||||
|
||||
; CHECK: ENDIF28:
|
||||
; CHECK: br label %Flow3
|
||||
ENDIF28: ; preds = %ENDIF
|
||||
%tmp35 = fadd float %temp8.0.ph, 1.0
|
||||
%tmp36 = icmp sgt i32 %tmp20, 2
|
||||
|
26
test/tools/llvm-readobj/macho-needed-libs.test
Normal file
26
test/tools/llvm-readobj/macho-needed-libs.test
Normal file
@ -0,0 +1,26 @@
|
||||
# RUN: yaml2obj %s -o %t.o
|
||||
# RUN: llvm-readobj -needed-libs %t.o | FileCheck %s
|
||||
|
||||
# CHECK: NeededLibraries [
|
||||
# CHECK-NEXT: /usr/lib/libSystem.B.dylib
|
||||
# CHECK-NEXT: ]
|
||||
|
||||
!mach-o
|
||||
FileHeader:
|
||||
magic: 0xFEEDFACF
|
||||
cputype: 0x01000007
|
||||
cpusubtype: 0x00000003
|
||||
filetype: 0x00000001
|
||||
ncmds: 1
|
||||
sizeofcmds: 56
|
||||
flags: 0x00002000
|
||||
reserved: 0x00000000
|
||||
LoadCommands:
|
||||
- cmd: LC_LOAD_DYLIB
|
||||
cmdsize: 56
|
||||
dylib:
|
||||
name: 24
|
||||
timestamp: 2
|
||||
current_version: 81985536
|
||||
compatibility_version: 65536
|
||||
PayloadString: /usr/lib/libSystem.B.dylib
|
@ -39,6 +39,8 @@ public:
|
||||
void printUnwindInfo() override;
|
||||
void printStackMap() const override;
|
||||
|
||||
void printNeededLibraries() override;
|
||||
|
||||
// MachO-specific.
|
||||
void printMachODataInCode() override;
|
||||
void printMachOVersionMin() override;
|
||||
@ -675,6 +677,34 @@ void MachODumper::printStackMap() const {
|
||||
StackMapV2Parser<support::big>(StackMapContentsArray));
|
||||
}
|
||||
|
||||
void MachODumper::printNeededLibraries() {
|
||||
ListScope D(W, "NeededLibraries");
|
||||
|
||||
using LibsTy = std::vector<StringRef>;
|
||||
LibsTy Libs;
|
||||
|
||||
for (const auto &Command : Obj->load_commands()) {
|
||||
if (Command.C.cmd == MachO::LC_LOAD_DYLIB ||
|
||||
Command.C.cmd == MachO::LC_ID_DYLIB ||
|
||||
Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
|
||||
Command.C.cmd == MachO::LC_REEXPORT_DYLIB ||
|
||||
Command.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
|
||||
Command.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
|
||||
MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command);
|
||||
if (Dl.dylib.name < Dl.cmdsize) {
|
||||
auto *P = static_cast<const char*>(Command.Ptr) + Dl.dylib.name;
|
||||
Libs.push_back(P);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::stable_sort(Libs.begin(), Libs.end());
|
||||
|
||||
for (const auto &L : Libs) {
|
||||
outs() << " " << L << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void MachODumper::printMachODataInCode() {
|
||||
for (const auto &Load : Obj->load_commands()) {
|
||||
if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
|
||||
|
@ -258,3 +258,98 @@ TEST(DominatorTreeBatchUpdates, InsertDeleteExhaustive) {
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
}
|
||||
}
|
||||
|
||||
// These are some odd flowgraphs, usually generated from csmith cases,
|
||||
// which are difficult on post dom trees.
|
||||
TEST(DominatorTreeBatchUpdates, InfiniteLoop) {
|
||||
std::vector<CFGBuilder::Arc> Arcs = {
|
||||
{"1", "2"},
|
||||
{"2", "3"},
|
||||
{"3", "6"}, {"3", "5"},
|
||||
{"4", "5"},
|
||||
{"5", "2"},
|
||||
{"6", "3"}, {"6", "4"}};
|
||||
|
||||
// SplitBlock on 3 -> 5
|
||||
std::vector<CFGBuilder::Update> Updates = {
|
||||
{CFGInsert, {"N", "5"}}, {CFGInsert, {"3", "N"}}, {CFGDelete, {"3", "5"}}};
|
||||
|
||||
CFGHolder Holder;
|
||||
CFGBuilder B(Holder.F, Arcs, Updates);
|
||||
DominatorTree DT(*Holder.F);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PostDomTree PDT(*Holder.F);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
|
||||
while (B.applyUpdate())
|
||||
;
|
||||
|
||||
auto DomUpdates = ToDomUpdates(B, Updates);
|
||||
DT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PDT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
}
|
||||
|
||||
TEST(DominatorTreeBatchUpdates, DeadBlocks) {
|
||||
std::vector<CFGBuilder::Arc> Arcs = {
|
||||
{"1", "2"},
|
||||
{"2", "3"},
|
||||
{"3", "4"}, {"3", "7"},
|
||||
{"4", "4"},
|
||||
{"5", "6"}, {"5", "7"},
|
||||
{"6", "7"},
|
||||
{"7", "2"}, {"7", "8"}};
|
||||
|
||||
// Remove dead 5 and 7,
|
||||
// plus SplitBlock on 7 -> 8
|
||||
std::vector<CFGBuilder::Update> Updates = {
|
||||
{CFGDelete, {"6", "7"}}, {CFGDelete, {"5", "7"}}, {CFGDelete, {"5", "6"}},
|
||||
{CFGInsert, {"N", "8"}}, {CFGInsert, {"7", "N"}}, {CFGDelete, {"7", "8"}}};
|
||||
|
||||
CFGHolder Holder;
|
||||
CFGBuilder B(Holder.F, Arcs, Updates);
|
||||
DominatorTree DT(*Holder.F);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PostDomTree PDT(*Holder.F);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
|
||||
while (B.applyUpdate())
|
||||
;
|
||||
|
||||
auto DomUpdates = ToDomUpdates(B, Updates);
|
||||
DT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PDT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
}
|
||||
|
||||
TEST(DominatorTreeBatchUpdates, InfiniteLoop2) {
|
||||
std::vector<CFGBuilder::Arc> Arcs = {
|
||||
{"1", "2"},
|
||||
{"2", "6"}, {"2", "3"},
|
||||
{"3", "4"},
|
||||
{"4", "5"}, {"4", "6"},
|
||||
{"5", "4"},
|
||||
{"6", "2"}};
|
||||
|
||||
// SplitBlock on 4 -> 6
|
||||
std::vector<CFGBuilder::Update> Updates = {
|
||||
{CFGInsert, {"N", "6"}}, {CFGInsert, {"4", "N"}}, {CFGDelete, {"4", "6"}}};
|
||||
|
||||
CFGHolder Holder;
|
||||
CFGBuilder B(Holder.F, Arcs, Updates);
|
||||
DominatorTree DT(*Holder.F);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PostDomTree PDT(*Holder.F);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
|
||||
while (B.applyUpdate())
|
||||
;
|
||||
|
||||
auto DomUpdates = ToDomUpdates(B, Updates);
|
||||
DT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
PDT.applyUpdates(DomUpdates);
|
||||
EXPECT_TRUE(PDT.verify());
|
||||
}
|
||||
|
@ -925,3 +925,28 @@ TEST(DominatorTree, InsertDeleteExhaustive) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DominatorTree, InsertIntoIrreducible) {
|
||||
std::vector<CFGBuilder::Arc> Arcs = {
|
||||
{"0", "1"},
|
||||
{"1", "27"}, {"1", "7"},
|
||||
{"10", "18"},
|
||||
{"13", "10"},
|
||||
{"18", "13"}, {"18", "23"},
|
||||
{"23", "13"}, {"23", "24"},
|
||||
{"24", "1"}, {"24", "18"},
|
||||
{"27", "24"}};
|
||||
|
||||
CFGHolder Holder;
|
||||
CFGBuilder B(Holder.F, Arcs, {{Insert, {"7", "23"}}});
|
||||
DominatorTree DT(*Holder.F);
|
||||
EXPECT_TRUE(DT.verify());
|
||||
|
||||
B.applyUpdate();
|
||||
BasicBlock *From = B.getOrAddBlock("7");
|
||||
BasicBlock *To = B.getOrAddBlock("23");
|
||||
DT.insertEdge(From, To);
|
||||
|
||||
EXPECT_TRUE(DT.verify());
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,7 @@ do_asserts="no"
|
||||
do_compare="yes"
|
||||
do_rt="yes"
|
||||
do_libs="yes"
|
||||
do_libcxxabi="yes"
|
||||
do_libunwind="yes"
|
||||
do_test_suite="yes"
|
||||
do_openmp="yes"
|
||||
@ -62,6 +63,7 @@ function usage() {
|
||||
echo " For example -svn-path trunk or -svn-path branches/release_37"
|
||||
echo " -no-rt Disable check-out & build Compiler-RT"
|
||||
echo " -no-libs Disable check-out & build libcxx/libcxxabi/libunwind"
|
||||
echo " -no-libcxxabi Disable check-out & build libcxxabi"
|
||||
echo " -no-libunwind Disable check-out & build libunwind"
|
||||
echo " -no-test-suite Disable check-out & build test-suite"
|
||||
echo " -no-openmp Disable check-out & build libomp"
|
||||
@ -135,6 +137,9 @@ while [ $# -gt 0 ]; do
|
||||
-no-libs )
|
||||
do_libs="no"
|
||||
;;
|
||||
-no-libcxxabi )
|
||||
do_libcxxabi="no"
|
||||
;;
|
||||
-no-libunwind )
|
||||
do_libunwind="no"
|
||||
;;
|
||||
@ -206,7 +211,10 @@ if [ $do_rt = "yes" ]; then
|
||||
projects="$projects compiler-rt"
|
||||
fi
|
||||
if [ $do_libs = "yes" ]; then
|
||||
projects="$projects libcxx libcxxabi"
|
||||
projects="$projects libcxx"
|
||||
if [ $do_libcxxabi = "yes" ]; then
|
||||
projects="$projects libcxxabi"
|
||||
fi
|
||||
if [ $do_libunwind = "yes" ]; then
|
||||
projects="$projects libunwind"
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user