Update llvm to release_39 branch r278877.

This commit is contained in:
Dimitry Andric 2016-08-17 19:41:29 +00:00
commit 6c4bc1bd27
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/clang390-import/; revision=304310
86 changed files with 1190 additions and 424 deletions

View File

@ -61,8 +61,6 @@ licenses, and/or restrictions:
Program Directory
------- ---------
Autoconf llvm/autoconf
llvm/projects/ModuleMaker/autoconf
Google Test llvm/utils/unittest/googletest
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}

View File

@ -2014,6 +2014,9 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef A);
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx);
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs);
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID);
@ -2600,6 +2603,9 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef A);
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C, LLVMAttributeIndex Idx);
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs);
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID);

View File

@ -27,19 +27,24 @@ template<class GraphType>
struct GraphTraits {
// Elements to provide:
// NOTICE: We are in a transition from migration interfaces that require
// NodeType *, to NodeRef. NodeRef is required to be cheap to copy, but does
// not have to be a raw pointer. In the transition, user should define
// NodeType, and NodeRef = NodeType *.
//
// typedef NodeType - Type of Node in the graph
// typedef NodeRef - NodeType *
// typedef ChildIteratorType - Type used to iterate over children in graph
// static NodeType *getEntryNode(const GraphType &)
// static NodeRef getEntryNode(const GraphType &)
// Return the entry node of the graph
// static ChildIteratorType child_begin(NodeType *)
// static ChildIteratorType child_end (NodeType *)
// static ChildIteratorType child_begin(NodeRef)
// static ChildIteratorType child_end (NodeRef)
// Return iterators that point to the beginning and ending of the child
// node list for the specified node.
//
// typedef ...iterator nodes_iterator;
// static nodes_iterator nodes_begin(GraphType *G)
// static nodes_iterator nodes_end (GraphType *G)
@ -57,7 +62,7 @@ struct GraphTraits {
// your argument to XXX_begin(...) is unknown or needs to have the proper .h
// file #include'd.
//
typedef typename GraphType::UnknownGraphTypeError NodeType;
typedef typename GraphType::UnknownGraphTypeError NodeRef;
};

View File

@ -37,23 +37,22 @@ namespace llvm {
/// build up a vector of nodes in a particular SCC. Note that it is a forward
/// iterator and thus you cannot backtrack or re-visit nodes.
template <class GraphT, class GT = GraphTraits<GraphT>>
class scc_iterator
: public iterator_facade_base<
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
const std::vector<typename GT::NodeType *>, ptrdiff_t> {
typedef typename GT::NodeType NodeType;
class scc_iterator : public iterator_facade_base<
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
const std::vector<typename GT::NodeRef>, ptrdiff_t> {
typedef typename GT::NodeRef NodeRef;
typedef typename GT::ChildIteratorType ChildItTy;
typedef std::vector<NodeType *> SccTy;
typedef std::vector<NodeRef> SccTy;
typedef typename scc_iterator::reference reference;
/// Element of VisitStack during DFS.
struct StackElement {
NodeType *Node; ///< The current node pointer.
NodeRef Node; ///< The current node pointer.
ChildItTy NextChild; ///< The next child, modified inplace during DFS.
unsigned MinVisited; ///< Minimum uplink value of all children of Node.
StackElement(NodeType *Node, const ChildItTy &Child, unsigned Min)
: Node(Node), NextChild(Child), MinVisited(Min) {}
StackElement(NodeRef Node, const ChildItTy &Child, unsigned Min)
: Node(Node), NextChild(Child), MinVisited(Min) {}
bool operator==(const StackElement &Other) const {
return Node == Other.Node &&
@ -67,10 +66,10 @@ class scc_iterator
///
/// nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
unsigned visitNum;
DenseMap<NodeType *, unsigned> nodeVisitNumbers;
DenseMap<NodeRef, unsigned> nodeVisitNumbers;
/// Stack holding nodes of the SCC.
std::vector<NodeType *> SCCNodeStack;
std::vector<NodeRef> SCCNodeStack;
/// The current SCC, retrieved using operator*().
SccTy CurrentSCC;
@ -80,7 +79,7 @@ class scc_iterator
std::vector<StackElement> VisitStack;
/// A single "visit" within the non-recursive DFS traversal.
void DFSVisitOne(NodeType *N);
void DFSVisitOne(NodeRef N);
/// The stack-based DFS traversal; defined below.
void DFSVisitChildren();
@ -88,7 +87,7 @@ class scc_iterator
/// Compute the next SCC using the DFS traversal.
void GetNextSCC();
scc_iterator(NodeType *entryN) : visitNum(0) {
scc_iterator(NodeRef entryN) : visitNum(0) {
DFSVisitOne(entryN);
GetNextSCC();
}
@ -131,7 +130,7 @@ class scc_iterator
/// This informs the \c scc_iterator that the specified \c Old node
/// has been deleted, and \c New is to be used in its place.
void ReplaceNode(NodeType *Old, NodeType *New) {
void ReplaceNode(NodeRef Old, NodeRef New) {
assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
nodeVisitNumbers[New] = nodeVisitNumbers[Old];
nodeVisitNumbers.erase(Old);
@ -139,7 +138,7 @@ class scc_iterator
};
template <class GraphT, class GT>
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeType *N) {
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeRef N) {
++visitNum;
nodeVisitNumbers[N] = visitNum;
SCCNodeStack.push_back(N);
@ -155,8 +154,8 @@ void scc_iterator<GraphT, GT>::DFSVisitChildren() {
assert(!VisitStack.empty());
while (VisitStack.back().NextChild != GT::child_end(VisitStack.back().Node)) {
// TOS has at least one more child so continue DFS
NodeType *childN = *VisitStack.back().NextChild++;
typename DenseMap<NodeType *, unsigned>::iterator Visited =
NodeRef childN = *VisitStack.back().NextChild++;
typename DenseMap<NodeRef, unsigned>::iterator Visited =
nodeVisitNumbers.find(childN);
if (Visited == nodeVisitNumbers.end()) {
// this node has never been seen.
@ -176,7 +175,7 @@ template <class GraphT, class GT> void scc_iterator<GraphT, GT>::GetNextSCC() {
DFSVisitChildren();
// Pop the leaf on top of the VisitStack.
NodeType *visitingN = VisitStack.back().Node;
NodeRef visitingN = VisitStack.back().Node;
unsigned minVisitNum = VisitStack.back().MinVisited;
assert(VisitStack.back().NextChild == GT::child_end(visitingN));
VisitStack.pop_back();
@ -212,7 +211,7 @@ bool scc_iterator<GraphT, GT>::hasLoop() const {
assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
if (CurrentSCC.size() > 1)
return true;
NodeType *N = CurrentSCC.front();
NodeRef N = CurrentSCC.front();
for (ChildItTy CI = GT::child_begin(N), CE = GT::child_end(N); CI != CE;
++CI)
if (*CI == N)

View File

@ -26,10 +26,18 @@
#include <memory>
#include <utility> // for std::pair
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
namespace detail {
template <typename RangeT>
using IterOfRange = decltype(std::begin(std::declval<RangeT>()));
} // End detail namespace
//===----------------------------------------------------------------------===//
// Extra additions to <functional>
@ -235,6 +243,90 @@ auto reverse(
llvm::make_reverse_iterator(std::begin(C)));
}
/// An iterator adaptor that filters the elements of given inner iterators.
///
/// The predicate parameter should be a callable object that accepts the wrapped
/// iterator's reference type and returns a bool. When incrementing or
/// decrementing the iterator, it will call the predicate on each element and
/// skip any where it returns false.
///
/// \code
/// int A[] = { 1, 2, 3, 4 };
/// auto R = make_filter_range(A, [](int N) { return N % 2 == 1; });
/// // R contains { 1, 3 }.
/// \endcode
template <typename WrappedIteratorT, typename PredicateT>
class filter_iterator
: public iterator_adaptor_base<
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
typename std::common_type<
std::forward_iterator_tag,
typename std::iterator_traits<
WrappedIteratorT>::iterator_category>::type> {
using BaseT = iterator_adaptor_base<
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
typename std::common_type<
std::forward_iterator_tag,
typename std::iterator_traits<WrappedIteratorT>::iterator_category>::
type>;
struct PayloadType {
WrappedIteratorT End;
PredicateT Pred;
};
Optional<PayloadType> Payload;
void findNextValid() {
assert(Payload && "Payload should be engaged when findNextValid is called");
while (this->I != Payload->End && !Payload->Pred(*this->I))
BaseT::operator++();
}
// Construct the begin iterator. The begin iterator requires to know where end
// is, so that it can properly stop when it hits end.
filter_iterator(WrappedIteratorT Begin, WrappedIteratorT End, PredicateT Pred)
: BaseT(std::move(Begin)),
Payload(PayloadType{std::move(End), std::move(Pred)}) {
findNextValid();
}
// Construct the end iterator. It's not incrementable, so Payload doesn't
// have to be engaged.
filter_iterator(WrappedIteratorT End) : BaseT(End) {}
public:
using BaseT::operator++;
filter_iterator &operator++() {
BaseT::operator++();
findNextValid();
return *this;
}
template <typename RT, typename PT>
friend iterator_range<filter_iterator<detail::IterOfRange<RT>, PT>>
make_filter_range(RT &&, PT);
};
/// Convenience function that takes a range of elements and a predicate,
/// and return a new filter_iterator range.
///
/// FIXME: Currently if RangeT && is a rvalue reference to a temporary, the
/// lifetime of that temporary is not kept by the returned range object, and the
/// temporary is going to be dropped on the floor after the make_iterator_range
/// full expression that contains this function call.
template <typename RangeT, typename PredicateT>
iterator_range<filter_iterator<detail::IterOfRange<RangeT>, PredicateT>>
make_filter_range(RangeT &&Range, PredicateT Pred) {
using FilterIteratorT =
filter_iterator<detail::IterOfRange<RangeT>, PredicateT>;
return make_range(FilterIteratorT(std::begin(std::forward<RangeT>(Range)),
std::end(std::forward<RangeT>(Range)),
std::move(Pred)),
FilterIteratorT(std::end(std::forward<RangeT>(Range))));
}
//===----------------------------------------------------------------------===//
// Extra additions to <utility>
//===----------------------------------------------------------------------===//

View File

@ -174,6 +174,7 @@ class Triple {
UnknownEnvironment,
GNU,
GNUABI64,
GNUEABI,
GNUEABIHF,
GNUX32,
@ -476,8 +477,9 @@ class Triple {
bool isGNUEnvironment() const {
EnvironmentType Env = getEnvironment();
return Env == Triple::GNU || Env == Triple::GNUEABI ||
Env == Triple::GNUEABIHF || Env == Triple::GNUX32;
return Env == Triple::GNU || Env == Triple::GNUABI64 ||
Env == Triple::GNUEABI || Env == Triple::GNUEABIHF ||
Env == Triple::GNUX32;
}
/// Checks if the environment could be MSVC.

View File

@ -155,7 +155,14 @@ template <
typename T = typename std::iterator_traits<WrappedIteratorT>::value_type,
typename DifferenceTypeT =
typename std::iterator_traits<WrappedIteratorT>::difference_type,
typename PointerT = T *, typename ReferenceT = T &,
typename PointerT = typename std::conditional<
std::is_same<T, typename std::iterator_traits<
WrappedIteratorT>::value_type>::value,
typename std::iterator_traits<WrappedIteratorT>::pointer, T *>::type,
typename ReferenceT = typename std::conditional<
std::is_same<T, typename std::iterator_traits<
WrappedIteratorT>::value_type>::value,
typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type,
// Don't provide these, they are mostly to act as aliases below.
typename WrappedTraitsT = std::iterator_traits<WrappedIteratorT>>
class iterator_adaptor_base
@ -168,15 +175,7 @@ class iterator_adaptor_base
iterator_adaptor_base() = default;
template <typename U>
explicit iterator_adaptor_base(
U &&u,
typename std::enable_if<
!std::is_base_of<typename std::remove_cv<
typename std::remove_reference<U>::type>::type,
DerivedT>::value,
int>::type = 0)
: I(std::forward<U &&>(u)) {}
explicit iterator_adaptor_base(WrappedIteratorT u) : I(std::move(u)) {}
const WrappedIteratorT &wrapped() const { return I; }

View File

@ -410,6 +410,7 @@ class CallGraphWrapperPass : public ModulePass {
// traversals.
template <> struct GraphTraits<CallGraphNode *> {
typedef CallGraphNode NodeType;
typedef CallGraphNode *NodeRef;
typedef CallGraphNode::CallRecord CGNPairTy;
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
@ -431,6 +432,7 @@ template <> struct GraphTraits<CallGraphNode *> {
template <> struct GraphTraits<const CallGraphNode *> {
typedef const CallGraphNode NodeType;
typedef const CallGraphNode *NodeRef;
typedef CallGraphNode::CallRecord CGNPairTy;
typedef std::pointer_to_unary_function<CGNPairTy, const CallGraphNode *>

View File

@ -196,6 +196,13 @@ namespace llvm {
/// block.
Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
/// \brief Generates a code sequence that evaluates this predicate.
/// The inserted instructions will be at position \p Loc.
/// The result will be of type i1 and will have a value of 0 when the
@ -253,6 +260,15 @@ namespace llvm {
void enableLSRMode() { LSRMode = true; }
/// \brief Set the current insertion point. This is useful if multiple calls
/// to expandCodeFor() are going to be made with the same insert point and
/// the insert point may be moved during one of the expansions (e.g. if the
/// insert point is not a block terminator).
void setInsertPoint(Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
}
/// \brief Clear the current insertion point. This is useful if the
/// instruction that had been serving as the insertion point may have been
/// deleted.
@ -313,12 +329,6 @@ namespace llvm {
Value *expand(const SCEV *S);
/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
/// \brief Determine the most "relevant" loop for the given SCEV.
const Loop *getRelevantLoop(const SCEV *);

View File

@ -740,6 +740,7 @@ struct MBB2NumberFunctor :
template <> struct GraphTraits<MachineBasicBlock *> {
typedef MachineBasicBlock NodeType;
typedef MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::succ_iterator ChildIteratorType;
static NodeType *getEntryNode(MachineBasicBlock *BB) { return BB; }
@ -753,6 +754,7 @@ template <> struct GraphTraits<MachineBasicBlock *> {
template <> struct GraphTraits<const MachineBasicBlock *> {
typedef const MachineBasicBlock NodeType;
typedef const MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
static NodeType *getEntryNode(const MachineBasicBlock *BB) { return BB; }
@ -772,6 +774,7 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
//
template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
typedef MachineBasicBlock NodeType;
typedef MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<MachineBasicBlock *> G) {
return G.Graph;
@ -786,6 +789,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
template <> struct GraphTraits<Inverse<const MachineBasicBlock*> > {
typedef const MachineBasicBlock NodeType;
typedef const MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::const_pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<const MachineBasicBlock*> G) {
return G.Graph;

View File

@ -210,6 +210,7 @@ class AttributeSet {
private:
friend class AttrBuilder;
friend class AttributeSetImpl;
friend class AttributeSetNode;
template <typename Ty> friend struct DenseMapInfo;
/// \brief The attributes that we are managing. This can be null to represent

View File

@ -155,6 +155,7 @@ struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
template <> struct GraphTraits<BasicBlock*> {
typedef BasicBlock NodeType;
typedef BasicBlock *NodeRef;
typedef succ_iterator ChildIteratorType;
static NodeType *getEntryNode(BasicBlock *BB) { return BB; }
@ -168,6 +169,7 @@ template <> struct GraphTraits<BasicBlock*> {
template <> struct GraphTraits<const BasicBlock*> {
typedef const BasicBlock NodeType;
typedef const BasicBlock *NodeRef;
typedef succ_const_iterator ChildIteratorType;
static NodeType *getEntryNode(const BasicBlock *BB) { return BB; }
@ -187,6 +189,7 @@ template <> struct GraphTraits<const BasicBlock*> {
//
template <> struct GraphTraits<Inverse<BasicBlock*> > {
typedef BasicBlock NodeType;
typedef BasicBlock *NodeRef;
typedef pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
static inline ChildIteratorType child_begin(NodeType *N) {
@ -199,6 +202,7 @@ template <> struct GraphTraits<Inverse<BasicBlock*> > {
template <> struct GraphTraits<Inverse<const BasicBlock*> > {
typedef const BasicBlock NodeType;
typedef const BasicBlock *NodeRef;
typedef const_pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<const BasicBlock*> G) {
return G.Graph;

View File

@ -479,6 +479,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@ -1512,8 +1514,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector bit test

View File

@ -2349,6 +2349,10 @@ class TargetLowering : public TargetLoweringBase {
/// from getBooleanContents().
bool isConstFalseVal(const SDNode *N) const;
/// Return a constant of type VT that contains a true value that respects
/// getBooleanContents()
SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
/// Return if \p N is a True value when extended to \p VT.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;

View File

@ -623,6 +623,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
typedef bfi_detail::IrreducibleGraph GraphT;
typedef const GraphT::IrrNode NodeType;
typedef const GraphT::IrrNode *NodeRef;
typedef GraphT::IrrNode::iterator ChildIteratorType;
static const NodeType *getEntryNode(const GraphT &G) {

View File

@ -1424,8 +1424,8 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
/// integer type Ty is used to select how many bits are available for the
/// result. Returns null if the conversion cannot be performed, otherwise
/// returns the Constant value resulting from the conversion.
Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
Type *Ty) {
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
@ -1438,7 +1438,8 @@ Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
/*isSigned=*/true, mode,
&isExact);
if (status != APFloat::opOK && status != APFloat::opInexact)
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
}
@ -1676,17 +1677,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty);
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty);
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty);
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty);
}
}

View File

@ -3400,7 +3400,10 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
return TrueVal;
if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
// FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
// decomposeBitTestICmp() might help.
unsigned BitWidth =
Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
@ -4274,7 +4277,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent())
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
!I->mayHaveSideEffects())
I->eraseFromParent();
} else {
Worklist.insert(I);
@ -4302,7 +4306,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent())
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
!I->mayHaveSideEffects())
I->eraseFromParent();
}
return Simplified;

View File

@ -115,13 +115,19 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) {
// We might have a vector load from an array. FIXME: for now we just bail
// out in this case, but we should be able to resolve and simplify such
// loads.
if(CDS->getElementType() != I.getType())
if (CDS->getElementType() != I.getType())
return false;
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
if (SimplifiedAddrOp->getValue().getActiveBits() >= 64)
unsigned ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
if (SimplifiedAddrOp->getValue().getActiveBits() > 64)
return false;
int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
int64_t SimplifiedAddrOpV = SimplifiedAddrOp->getSExtValue();
if (SimplifiedAddrOpV < 0) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.
return false;
}
uint64_t Index = static_cast<uint64_t>(SimplifiedAddrOpV) / ElemSize;
if (Index >= CDS->getNumElements()) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.

View File

@ -1610,8 +1610,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
setInsertPoint(IP);
return expandCodeFor(SH, Ty);
}

View File

@ -214,10 +214,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
}
TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
// It's possible to ask for the FuncId of a function which doesn't have a
// subprogram: inlining a function with debug info into a function with none.
if (!SP)
return TypeIndex::None();
assert(SP);
// Check if we've already translated this subprogram.
auto I = TypeIndices.find({SP, nullptr});
@ -621,11 +618,12 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
std::string FuncName;
auto *SP = GV->getSubprogram();
assert(SP);
setCurrentSubprogram(SP);
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
if (SP != nullptr && !SP->getDisplayName().empty())
if (!SP->getDisplayName().empty())
FuncName =
getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
@ -864,7 +862,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
void CodeViewDebug::beginFunction(const MachineFunction *MF) {
assert(!CurFn && "Can't process two functions at once!");
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
return;
DebugHandlerBase::beginFunction(MF);
@ -1939,7 +1937,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
// Ignore DBG_VALUE locations and function prologue.
if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
DebugLoc DL = MI->getDebugLoc();
if (DL == PrevInstLoc || !DL)

View File

@ -996,6 +996,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
MachineBasicBlock *IBB = &*I;
MachineBasicBlock *PredBB = &*std::prev(I);
MergePotentials.clear();
MachineLoop *ML;
// Bail if merging after placement and IBB is the loop header because
// -- If merging predecessors that belong to the same loop as IBB, the
// common tail of merged predecessors may become the loop top if block
// placement is called again and the predecessors may branch to this common
// tail and require more branches. This can be relaxed if
// MachineBlockPlacement::findBestLoopTop is more flexible.
// --If merging predecessors that do not belong to the same loop as IBB, the
// loop info of IBB's loop and the other loops may be affected. Calling the
// block placement again may make big change to the layout and eliminate the
// reason to do tail merging here.
if (AfterBlockPlacement && MLI) {
ML = MLI->getLoopFor(IBB);
if (ML && IBB == ML->getHeader())
continue;
}
for (MachineBasicBlock *PBB : I->predecessors()) {
if (MergePotentials.size() == TailMergeThreshold)
break;
@ -1015,16 +1033,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->hasEHPadSuccessor())
continue;
// Bail out if the loop header (IBB) is not the top of the loop chain
// after the block placement. Otherwise, the common tail of IBB's
// predecessors may become the loop top if block placement is called again
// and the predecessors may branch to this common tail.
// FIXME: Relaxed this check if the algorithm of finding loop top is
// changed in MBP.
// After block placement, only consider predecessors that belong to the
// same loop as IBB. The reason is the same as above when skipping loop
// header.
if (AfterBlockPlacement && MLI)
if (MachineLoop *ML = MLI->getLoopFor(IBB))
if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
continue;
if (ML != MLI->getLoopFor(PBB))
continue;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;

View File

@ -530,7 +530,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
unsigned Align =
std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getLiveRange(StackGuardSlot));
Align, SSC.getFullLiveRange());
}
for (Argument *Arg : ByValArguments) {

View File

@ -25,7 +25,9 @@ static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::Hidden, cl::init(true));
const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
return LiveRanges[AllocaNumbering[AI]];
const auto IT = AllocaNumbering.find(AI);
assert(IT != AllocaNumbering.end());
return LiveRanges[IT->second];
}
bool StackColoring::readMarker(Instruction *I, bool *IsStart) {

View File

@ -100,7 +100,8 @@ void StackLayout::layoutObject(StackObject &Obj) {
}
// Split starting and ending regions if necessary.
for (StackRegion &R : Regions) {
for (unsigned i = 0; i < Regions.size(); ++i) {
StackRegion &R = Regions[i];
if (Start > R.Start && Start < R.End) {
StackRegion R0 = R;
R.Start = R0.End = Start;

View File

@ -6198,13 +6198,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
// Here, T can be 1 or -1, depending on the type of the setcc and
// getBooleanContents().
unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
SDLoc DL(N);
SDValue NegOne =
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
// bit depends on getBooleanContents(). So, ask TLI for a real "true" value
// of the appropriate width.
SDValue ExtTrueVal =
(SetCCWidth == 1)
? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
DL, VT)
: TLI.getConstTrueVal(DAG, VT, DL);
if (SDValue SCC = SimplifySelectCC(
DL, N0.getOperand(0), N0.getOperand(1), NegOne,
DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
return SCC;
@ -6215,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC,
NegOne, DAG.getConstant(0, DL, VT));
SDValue SetCC =
DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
DAG.getConstant(0, DL, VT));
}
}
}

View File

@ -6639,19 +6639,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
SDNode *FromNode = From.getNode();
SDNode *ToNode = To.getNode();
ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
SmallVector<SDDbgValue *, 2> ClonedDVs;
for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
I != E; ++I) {
SDDbgValue *Dbg = *I;
// Only add Dbgvalues attached to same ResNo.
if (Dbg->getKind() == SDDbgValue::SDNODE &&
Dbg->getResNo() == From.getResNo()) {
Dbg->getSDNode() == From.getNode() &&
Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
assert(FromNode != ToNode &&
"Should not transfer Debug Values intranode");
SDDbgValue *Clone =
getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
Dbg->getDebugLoc(), Dbg->getOrder());
AddDbgValue(Clone, ToNode, false);
ClonedDVs.push_back(Clone);
Dbg->setIsInvalidated();
}
}
for (SDDbgValue *I : ClonedDVs)
AddDbgValue(I, ToNode, false);
}
//===----------------------------------------------------------------------===//

View File

@ -1234,6 +1234,16 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
const SDLoc &DL) const {
unsigned ElementWidth = VT.getScalarSizeInBits();
APInt TrueInt =
getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
? APInt(ElementWidth, 1)
: APInt::getAllOnesValue(ElementWidth);
return DAG.getConstant(TrueInt, DL, VT);
}
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;

View File

@ -29,7 +29,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@ -539,6 +539,16 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
return TRI->regsOverlap(RegA, RegB);
}
// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
const TargetRegisterInfo *TRI) {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
return true;
return false;
}
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
bool
@ -864,9 +874,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// FIXME: Needs more sophisticated heuristics.
return false;
SmallSet<unsigned, 2> Uses;
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
SmallVector<unsigned, 2> Uses;
SmallVector<unsigned, 2> Kills;
SmallVector<unsigned, 2> Defs;
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
@ -874,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isDef())
Defs.insert(MOReg);
Defs.push_back(MOReg);
else {
Uses.insert(MOReg);
Uses.push_back(MOReg);
if (MOReg != Reg && (MO.isKill() ||
(LIS && isPlainlyKilled(MI, MOReg, LIS))))
Kills.insert(MOReg);
Kills.push_back(MOReg);
}
}
@ -888,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator AfterMI = std::next(Begin);
MachineBasicBlock::iterator End = AfterMI;
while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
Defs.insert(End->getOperand(0).getReg());
while (End->isCopy() &&
regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
Defs.push_back(End->getOperand(0).getReg());
++End;
}
@ -915,21 +926,21 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isDef()) {
if (Uses.count(MOReg))
if (regOverlapsSet(Uses, MOReg, TRI))
// Physical register use would be clobbered.
return false;
if (!MO.isDead() && Defs.count(MOReg))
if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
// May clobber a physical register def.
// FIXME: This may be too conservative. It's ok if the instruction
// is sunken completely below the use.
return false;
} else {
if (Defs.count(MOReg))
if (regOverlapsSet(Defs, MOReg, TRI))
return false;
bool isKill =
MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
if (MOReg != Reg &&
((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
regOverlapsSet(Kills, MOReg, TRI)))
// Don't want to extend other live ranges and update kills.
return false;
if (MOReg == Reg && !isKill)

View File

@ -19,8 +19,8 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/Attributes.h"
#include "AttributeSetNode.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/TrailingObjects.h"
#include <climits>
#include <string>
@ -142,73 +142,6 @@ class StringAttributeImpl : public AttributeImpl {
StringRef getStringValue() const { return Val; }
};
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a group of attributes that apply to one
/// element: function, return type, or parameter.
class AttributeSetNode final
: public FoldingSetNode,
private TrailingObjects<AttributeSetNode, Attribute> {
friend TrailingObjects;
unsigned NumAttrs; ///< Number of attributes in this node.
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint64_t AvailableAttrs;
AttributeSetNode(ArrayRef<Attribute> Attrs)
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
"Too many attributes for AvailableAttrs");
// There's memory after the node where we can store the entries in.
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
for (Attribute I : *this) {
if (!I.isStringAttribute()) {
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
}
}
}
// AttributesSetNode is uniqued, these should not be publicly available.
void operator=(const AttributeSetNode &) = delete;
AttributeSetNode(const AttributeSetNode &) = delete;
public:
void operator delete(void *p) { ::operator delete(p); }
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
/// \brief Return the number of attributes this AttributeSet contains.
unsigned getNumAttributes() const { return NumAttrs; }
bool hasAttribute(Attribute::AttrKind Kind) const {
return AvailableAttrs & ((uint64_t)1) << Kind;
}
bool hasAttribute(StringRef Kind) const;
bool hasAttributes() const { return NumAttrs != 0; }
Attribute getAttribute(Attribute::AttrKind Kind) const;
Attribute getAttribute(StringRef Kind) const;
unsigned getAlignment() const;
unsigned getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
iterator begin() const { return getTrailingObjects<Attribute>(); }
iterator end() const { return begin() + NumAttrs; }
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, makeArrayRef(begin(), end()));
}
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
AttrList[I].Profile(ID);
}
};
typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,98 @@
//===-- AttributeSetNode.h - AttributeSet Internal Node ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file defines the node class used internally by AttributeSet.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_ATTRIBUTESETNODE_H
#define LLVM_IR_ATTRIBUTESETNODE_H
#include "llvm/ADT/FoldingSet.h"
#include "llvm/IR/Attributes.h"
#include "llvm/Support/TrailingObjects.h"
#include <climits>
namespace llvm {
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a group of attributes that apply to one
/// element: function, return type, or parameter.
class AttributeSetNode final
: public FoldingSetNode,
private TrailingObjects<AttributeSetNode, Attribute> {
friend TrailingObjects;
unsigned NumAttrs; ///< Number of attributes in this node.
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint64_t AvailableAttrs;
AttributeSetNode(ArrayRef<Attribute> Attrs)
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
"Too many attributes for AvailableAttrs");
// There's memory after the node where we can store the entries in.
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
for (Attribute I : *this) {
if (!I.isStringAttribute()) {
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
}
}
}
// AttributesSetNode is uniqued, these should not be publicly available.
void operator=(const AttributeSetNode &) = delete;
AttributeSetNode(const AttributeSetNode &) = delete;
public:
void operator delete(void *p) { ::operator delete(p); }
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
static AttributeSetNode *get(AttributeSet AS, unsigned Index) {
return AS.getAttributes(Index);
}
/// \brief Return the number of attributes this AttributeSet contains.
unsigned getNumAttributes() const { return NumAttrs; }
bool hasAttribute(Attribute::AttrKind Kind) const {
return AvailableAttrs & ((uint64_t)1) << Kind;
}
bool hasAttribute(StringRef Kind) const;
bool hasAttributes() const { return NumAttrs != 0; }
Attribute getAttribute(Attribute::AttrKind Kind) const;
Attribute getAttribute(StringRef Kind) const;
unsigned getAlignment() const;
unsigned getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
iterator begin() const { return getTrailingObjects<Attribute>(); }
iterator end() const { return begin() + NumAttrs; }
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, makeArrayRef(begin(), end()));
}
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
AttrList[I].Profile(ID);
}
};
} // end llvm namespace
#endif

View File

@ -251,8 +251,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
Name == "avx.cvt.ps2.pd.256" ||
Name == "sse2.cvttps2dq" ||
Name.startswith("avx.cvtt.") ||
Name.startswith("avx.vinsertf128.") ||
Name == "avx2.vinserti128" ||
Name.startswith("avx.vextractf128.") ||
@ -712,12 +710,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
} else if (IsX86 && (Name == "sse2.cvttps2dq" ||
Name.startswith("avx.cvtt."))) {
// Truncation (round to zero) float/double to i32 vector conversion.
Value *Src = CI->getArgOperand(0);
VectorType *DstTy = cast<VectorType>(CI->getType());
Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Attributes.h"
#include "AttributeSetNode.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@ -1844,6 +1845,18 @@ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
}
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
return ASN->getNumAttributes();
}
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
for (auto A: make_range(ASN->begin(), ASN->end()))
*Attrs++ = wrap(A);
}
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID) {
@ -2216,6 +2229,21 @@ void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
CallSite(unwrap<Instruction>(C)).addAttribute(Idx, unwrap(A));
}
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
LLVMAttributeIndex Idx) {
auto CS = CallSite(unwrap<Instruction>(C));
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
return ASN->getNumAttributes();
}
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
auto CS = CallSite(unwrap<Instruction>(C));
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
for (auto A: make_range(ASN->begin(), ASN->end()))
*Attrs++ = wrap(A);
}
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID) {

View File

@ -675,8 +675,8 @@ void MDNode::handleChangedOperand(void *Ref, Metadata *New) {
Metadata *Old = getOperand(Op);
setOperand(Op, New);
// Drop uniquing for self-reference cycles.
if (New == this) {
// Drop uniquing for self-reference cycles and deleted constants.
if (New == this || (!New && Old && isa<ConstantAsMetadata>(Old))) {
if (!isResolved())
resolve();
storeDistinctInContext();

View File

@ -201,6 +201,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
switch (Kind) {
case UnknownEnvironment: return "unknown";
case GNU: return "gnu";
case GNUABI64: return "gnuabi64";
case GNUEABIHF: return "gnueabihf";
case GNUEABI: return "gnueabi";
case GNUX32: return "gnux32";
@ -468,6 +469,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
.StartsWith("eabihf", Triple::EABIHF)
.StartsWith("eabi", Triple::EABI)
.StartsWith("gnuabi64", Triple::GNUABI64)
.StartsWith("gnueabihf", Triple::GNUEABIHF)
.StartsWith("gnueabi", Triple::GNUEABI)
.StartsWith("gnux32", Triple::GNUX32)

View File

@ -250,6 +250,7 @@ def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
FeatureMacroOpFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
HasV8_1aOps]>;
def : ProcessorModel<"generic", NoSchedModel, [

View File

@ -7685,6 +7685,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
/// Fold a floating-point multiply by power of two into floating-point to
/// fixed-point conversion.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
@ -7728,10 +7729,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = MVT::v4i32;
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");
SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
@ -9853,7 +9860,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFpToIntCombine(N, DAG, Subtarget);
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, Subtarget);
case ISD::OR:

View File

@ -20,6 +20,7 @@ class AMDGPUInstrPrinter;
class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
class GCNTargetMachine;
struct MachineSchedContext;
class MCAsmInfo;
class raw_ostream;
@ -50,7 +51,7 @@ FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const TargetMachine *TM = nullptr);
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);

View File

@ -783,15 +783,19 @@ void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
auto Node = MD->getOperand(0);
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
if (MD->getNumOperands()) {
auto Node = MD->getOperand(0);
if (Node->getNumOperands() > 1) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
}
}
}
}

View File

@ -14,7 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
@ -30,15 +32,28 @@ using namespace llvm;
namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare> {
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;
const SISubtarget *ST;
DivergenceAnalysis *DA;
const TargetMachine *TM;
Module *Mod;
bool HasUnsafeFPMath;
public:
static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
FunctionPass(ID),
TM(TM) { }
TM(static_cast<const GCNTargetMachine *>(TM)),
ST(nullptr),
DA(nullptr),
Mod(nullptr),
HasUnsafeFPMath(false) { }
bool visitFDiv(BinaryOperator &I);
bool visitInstruction(Instruction &I) {
return false;
}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@ -55,7 +70,92 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
} // End anonymous namespace
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
return false;
// Reciprocal f32 is handled separately without denormals.
return UnsafeDiv || CNum->isExactlyValue(+1.0);
}
// Insert an intrinsic for fast fdiv for safe math situations where we can
// reduce precision. Leave fdiv for situations where the generic node is
// expected to be optimized.
bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Type *Ty = FDiv.getType();
// TODO: Handle half
if (!Ty->getScalarType()->isFloatTy())
return false;
MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
if (!FPMath)
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
float ULP = FPOp->getFPAccuracy();
if (ULP < 2.5f)
return false;
FastMathFlags FMF = FPOp->getFastMathFlags();
bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
FMF.allowReciprocal();
if (ST->hasFP32Denormals() && !UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
Function *Decl
= II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
Value *NewFDiv = nullptr;
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
NewFDiv = UndefValue::get(VT);
// FIXME: Doesn't do the right thing for cases where the vector is partially
// constant. This works when the scalarizer pass is run first.
for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
Value *NumEltI = Builder.CreateExtractElement(Num, I);
Value *DenEltI = Builder.CreateExtractElement(Den, I);
Value *NewElt;
if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
} else {
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
}
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
} else {
if (!shouldKeepFDivF32(Num, UnsafeDiv))
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
}
if (NewFDiv) {
FDiv.replaceAllUsesWith(NewFDiv);
NewFDiv->takeName(&FDiv);
FDiv.eraseFromParent();
}
return true;
}
static bool hasUnsafeFPMath(const Function &F) {
Attribute Attr = F.getFnAttribute("unsafe-fp-math");
return Attr.getValueAsString() == "true";
}
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
return false;
}
@ -63,10 +163,21 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (!TM || skipFunction(F))
return false;
ST = &TM->getSubtarget<SISubtarget>(F);
DA = &getAnalysis<DivergenceAnalysis>();
visit(F);
HasUnsafeFPMath = hasUnsafeFPMath(F);
return true;
bool MadeChange = false;
for (BasicBlock &BB : F) {
BasicBlock::iterator Next;
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
Next = std::next(I);
MadeChange |= visit(*I);
}
}
return MadeChange;
}
INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
@ -77,6 +188,6 @@ INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
char AMDGPUCodeGenPrepare::ID = 0;
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const TargetMachine *TM) {
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
return new AMDGPUCodeGenPrepare(TM);
}

View File

@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP32_NEG_ONE = 0xbf800000;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
int FP64_NEG_ONE = 0xbff0000000000000;
}
def CONST : Constants;

View File

@ -29,16 +29,39 @@ static const char *const IntrinsicNameTable[] = {
#undef GET_INTRINSIC_NAME_TABLE
};
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
if (IntrID < Intrinsic::num_intrinsics) {
return nullptr;
}
namespace {
#define GET_INTRINSIC_ATTRIBUTES
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
}
StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
ArrayRef<Type *> Tys) const {
if (IntrID < Intrinsic::num_intrinsics)
return StringRef();
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
"Invalid intrinsic ID");
std::string Result(IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics]);
return Result;
return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
}
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned NumTys) const {
return getName(IntrID, makeArrayRef(Tys, NumTys)).str();
}
FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys) const {
// FIXME: Re-use Intrinsic::getType machinery
switch (ID) {
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
Type *F32Ty = Type::getFloatTy(Context);
return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
}
default:
llvm_unreachable("unhandled intrinsic");
}
}
unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
@ -69,7 +92,19 @@ bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
}
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTys) const {
llvm_unreachable("Not implemented");
ArrayRef<Type *> Tys) const {
FunctionType *FTy = getType(M->getContext(), IntrID, Tys);
Function *F
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
AttributeSet AS = getAttributes(M->getContext(),
static_cast<AMDGPUIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned NumTys) const {
return getDeclaration(M, IntrID, makeArrayRef(Tys, NumTys));
}

View File

@ -34,13 +34,23 @@ enum ID {
class AMDGPUIntrinsicInfo final : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo();
StringRef getName(unsigned IntrId, ArrayRef<Type *> Tys = None) const;
std::string getName(unsigned IntrId, Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned NumTys = 0) const override;
unsigned lookupName(const char *Name, unsigned Len) const override;
bool isOverloaded(unsigned IID) const override;
Function *getDeclaration(Module *M, unsigned ID,
Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned NumTys = 0) const override;
Function *getDeclaration(Module *M, unsigned ID,
ArrayRef<Type *> = None) const;
FunctionType *getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys = None) const;
};
} // end namespace llvm

View File

@ -348,9 +348,6 @@ static VectorType *arrayTypeToVecType(Type *ArrayTy) {
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
if (isa<AllocaInst>(Ptr))
return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
auto I = GEPIdx.find(GEP);
@ -360,11 +357,11 @@ calculateVectorIndex(Value *Ptr,
static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
// FIXME we only support simple cases
if (GEP->getNumOperands() != 3)
return NULL;
return nullptr;
ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
if (!I0 || !I0->isZero())
return NULL;
return nullptr;
return GEP->getOperand(2);
}
@ -398,7 +395,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
// are just being conservative for now.
if (!AllocaTy ||
AllocaTy->getElementType()->isVectorTy() ||
AllocaTy->getNumElements() > 4) {
AllocaTy->getNumElements() > 4 ||
AllocaTy->getNumElements() < 2) {
DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
@ -443,9 +441,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(0);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
Inst->replaceAllUsesWith(ExtractElement);
@ -453,9 +453,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::Store: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(1);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
Inst->getOperand(0),
@ -469,7 +471,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
default:
Inst->dump();
llvm_unreachable("Inconsistency in instructions promotable to vector");
}
}
@ -477,11 +478,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
}
static bool isCallPromotable(CallInst *CI) {
// TODO: We might be able to handle some cases where the callee is a
// constantexpr bitcast of a function.
if (!CI->getCalledFunction())
return false;
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (!II)
return false;
@ -773,28 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
continue;
}
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
if (!Intr) {
// FIXME: What is this for? It doesn't make sense to promote arbitrary
// function calls. If the call is to a defined function that can also be
// promoted, we should be able to do this once that function is also
// rewritten.
std::vector<Type*> ArgTypes;
for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
ArgIdx != ArgEnd; ++ArgIdx) {
ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
}
Function *F = Call->getCalledFunction();
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
F->isVarArg());
Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
NewType, F->getAttributes());
Function *NewF = cast<Function>(C);
Call->setCalledFunction(NewF);
continue;
}
IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
Builder.SetInsertPoint(Intr);
switch (Intr->getIntrinsicID()) {
case Intrinsic::lifetime_start:

View File

@ -309,6 +309,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
void addIRPasses() override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
bool addInstSelector() override;
@ -499,6 +500,13 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&DeadMachineInstructionElimID);
}
void GCNPassConfig::addIRPasses() {
// TODO: May want to move later or split into an early and late one.
addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
AMDGPUPassConfig::addIRPasses();
}
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(createSILowerI1CopiesPass());

View File

@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::FP_TO_UINT:
if (N->getValueType(0) == MVT::i1) {
Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
return;
}
// Fall-through. Since we don't care about out of bounds values
// we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
// considers some extra cases which are not necessary here.
case ISD::FP_TO_SINT: {
if (N->getValueType(0) == MVT::i1) {
Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
return;
}
SDValue Result;
if (expandFP_TO_SINT(N, Result, DAG))
Results.push_back(Result);
@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
}
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
DL,
MVT::i1,
Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETEQ));
}
SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
DL,
MVT::i1,
Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETEQ));
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,

View File

@ -72,7 +72,8 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

View File

@ -41,7 +41,8 @@ enum {
WQM = 1 << 22,
VGPRSpill = 1 << 23,
VOPAsmPrefer32Bit = 1 << 24,
Gather4 = 1 << 25
Gather4 = 1 << 25,
DisableWQM = 1 << 26
};
}

View File

@ -1134,9 +1134,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOVK_I32))
.addOperand(MI.getOperand(0))
.addImm(MFI->LDSSize);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.addOperand(MI.getOperand(0))
.addImm(MFI->LDSSize);
MI.eraseFromParent();
return BB;
}
@ -1792,6 +1792,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
return lowerFDIV_FAST(Op, DAG);
}
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
Op.getOperand(1),
@ -2098,7 +2101,8 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Catch division cases where we can use shortcuts with rcp and rsq
// instructions.
SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@ -2139,47 +2143,48 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
// TODO: Should this propagate fast-math-flags?
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
// rcp does not support denormals.
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = LowerFastFDIV(Op, DAG))
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag
if (EnableAMDGPUFastFDIV) {
// This does not support denormals.
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
// TODO: Should this propagate fast-math-flags?
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
// rcp does not support denormals.
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
// Generates more precise fpdiv32.
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
@ -2209,7 +2214,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
if (DAG.getTarget().Options.UnsafeFPMath)
return LowerFastFDIV(Op, DAG);
return lowerFastUnsafeFDIV(Op, DAG);
SDLoc SL(Op);
SDValue X = Op.getOperand(0);

View File

@ -36,7 +36,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;

View File

@ -41,6 +41,8 @@ class InstSI <dag outs, dag ins, string asm = "",
field bits<1> DS = 0;
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
// Whether WQM _must_ be enabled for this instruction.
field bits<1> WQM = 0;
field bits<1> VGPRSpill = 0;
@ -50,6 +52,9 @@ class InstSI <dag outs, dag ins, string asm = "",
field bits<1> Gather4 = 0;
// Whether WQM _must_ be disabled for this instruction.
field bits<1> DisableWQM = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@ -81,6 +86,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{23} = VGPRSpill;
let TSFlags{24} = VOPAsmPrefer32Bit;
let TSFlags{25} = Gather4;
let TSFlags{26} = DisableWQM;
let SchedRW = [Write32Bit];

View File

@ -738,7 +738,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
MachineBasicBlock::iterator Insert = Entry.front();
DebugLoc DL = Insert->getDebugLoc();
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
*MF);
if (TIDReg == AMDGPU::NoRegister)
return TIDReg;

View File

@ -340,6 +340,14 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
static bool isDisableWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
}
bool isDisableWQM(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
}
static bool isVGPRSpill(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
}

View File

@ -2949,6 +2949,10 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <0>;
let DisableWQM = 1 in {
def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>;
}
let addr64 = 0, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@ -3019,7 +3023,8 @@ multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins,
multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
ValueType vt, SDPatternOperator atomic> {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1,
DisableWQM = 1 in {
// No return variants
let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in {
@ -3423,6 +3428,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
let mayStore = 1;
let hasSideEffects = 1;
let hasPostISelHook = 0;
let DisableWQM = 1;
}
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
@ -3454,6 +3460,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let mayStore = 1;
let hasSideEffects = 1;
let hasPostISelHook = 0;
let DisableWQM = 1;
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
}

View File

@ -2200,7 +2200,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, 0,
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _OFFSET) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
@ -2208,7 +2208,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _IDXEN) $vdata, $vindex, $rsrc, $soffset,
(!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), 0)
>;
@ -2217,7 +2217,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, 0,
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _OFFEN) $vdata, $voffset, $rsrc, $soffset,
(!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), 0)
>;
@ -2226,7 +2226,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _BOTHEN)
(!cast<MUBUF>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@ -3391,6 +3391,16 @@ def : Pat <
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;
class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
(i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
// If we need to perform a logical operation on i1 values, we need to
// use vector comparisons since there is only one SCC register. Vector
// comparisions still write to a pair of SGPRs, so treat these as

View File

@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
// SI Intrinsic Definitions
// Backend internal SI Intrinsic Definitions. User code should not
// directly use these.
//
//===----------------------------------------------------------------------===//
@ -177,6 +178,12 @@ let TargetPrefix = "SI", isTarget = 1 in {
} // End TargetPrefix = "SI", isTarget = 1
let TargetPrefix = "amdgcn", isTarget = 1 in {
// Emit 2.5 ulp, no denormal division. Should only be inserted by
// pass based on !fpmath metadata.
def int_amdgcn_fdiv_fast : Intrinsic<
[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
>;
/* Control flow Intrinsics */
def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;

View File

@ -203,7 +203,8 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
Spill.Lane = Lane;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
*MF);
if (LaneVGPR == AMDGPU::NoRegister)
// We have no VGPRs left for spilling SGPRs.

View File

@ -957,10 +957,13 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
/// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return
// AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
unsigned
SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
const MachineFunction &MF) const {
for (unsigned Reg : *RC)
if (!MRI.isPhysRegUsed(Reg))
if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
return Reg;
return AMDGPU::NoRegister;
}

View File

@ -185,7 +185,8 @@ struct SIRegisterInfo final : public AMDGPURegisterInfo {
unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
const TargetRegisterClass *RC,
const MachineFunction &MF) const;
unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
unsigned getVGPR32PressureSet() const { return VGPR32SetID; };

View File

@ -94,12 +94,15 @@ class SIWholeQuadMode : public MachineFunctionPass {
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<const MachineInstr *, 2> ExecExports;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
@ -126,6 +129,7 @@ class SIWholeQuadMode : public MachineFunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -135,8 +139,11 @@ class SIWholeQuadMode : public MachineFunctionPass {
char SIWholeQuadMode::ID = 0;
INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
"SI Whole Quad Mode", false, false)
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
return new SIWholeQuadMode;
}
void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {
InstrInfo &II = Instructions[&MI];
assert(Flag == StateWQM || Flag == StateExact);
// Ignore if the instruction is already marked. The typical case is that we
// mark an instruction WQM multiple times, but for atomics it can happen that
// Flag is StateWQM, but Needs is already set to StateExact. In this case,
// letting the atomic run in StateExact is correct as per the relevant specs.
if (II.Needs)
return;
II.Needs = Flag;
Worklist.push_back(&MI);
}
// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
@ -161,7 +185,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
Flags = StateWQM;
} else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
} else if (TII->isDisableWQM(MI)) {
Flags = StateExact;
} else {
// Handle export instructions with the exec mask valid flag set
@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
continue;
}
Instructions[&MI].Needs = Flags;
Worklist.push_back(&MI);
markInstruction(MI, Flags, Worklist);
GlobalFlags |= Flags;
}
@ -214,9 +237,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[MBB];
// Control flow-type instructions that are followed by WQM computations
// must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
// Control flow-type instructions and stores to temporary memory that are
// followed by WQM computations must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !II.Needs &&
(MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;
}
@ -249,32 +273,35 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
if (!Use.isReg() || !Use.isUse())
continue;
// At this point, physical registers appear as inputs or outputs
// and following them makes no sense (and would in fact be incorrect
// when the same VGPR is used as both an output and an input that leads
// to a NeedsWQM instruction).
//
// Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
// have to trace this, in practice it happens for 64-bit computations like
// pointers where both dwords are followed already anyway.
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
continue;
unsigned Reg = Use.getReg();
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
InstrInfo &DefII = Instructions[&DefMI];
// Obviously skip if DefMI is already flagged as NeedWQM.
//
// The instruction might also be flagged as NeedExact. This happens when
// the result of an atomic is used in a WQM computation. In this case,
// the atomic must not run for helper pixels and the WQM result is
// undefined.
if (DefII.Needs != 0)
// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
if (Reg == AMDGPU::EXEC)
continue;
DefII.Needs = StateWQM;
Worklist.push_back(&DefMI);
for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
LiveRange &LR = LIS->getRegUnit(*RegUnit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)
continue;
// Since we're in machine SSA, we do not need to track physical
// registers across basic blocks.
if (Value->isPHIDef())
continue;
markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
Worklist);
}
continue;
}
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
markInstruction(DefMI, StateWQM, Worklist);
}
}
@ -468,6 +495,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
if (!(GlobalFlags & StateWQM)) {

View File

@ -3857,7 +3857,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;
uint64_t SatConstant;
if (isSaturatingConditional(Op, SatValue, SatConstant))
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
isSaturatingConditional(Op, SatValue, SatConstant))
return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));

View File

@ -3650,7 +3650,8 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
def SSAT : AI<(outs GPRnopc:$Rd),
(ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@ -3666,7 +3667,8 @@ def SSAT : AI<(outs GPRnopc:$Rd),
def SSAT16 : AI<(outs GPRnopc:$Rd),
(ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@ -3679,7 +3681,8 @@ def SSAT16 : AI<(outs GPRnopc:$Rd),
def USAT : AI<(outs GPRnopc:$Rd),
(ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsARM,HasV6]> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@ -3695,7 +3698,8 @@ def USAT : AI<(outs GPRnopc:$Rd),
def USAT16 : AI<(outs GPRnopc:$Rd),
(ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;

View File

@ -2240,7 +2240,8 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin,
def t2SSAT: T2SatI<
(outs rGPR:$Rd),
(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@ -2251,7 +2252,7 @@ def t2SSAT: T2SatI<
def t2SSAT16: T2SatI<
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasDSP]> {
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@ -2265,7 +2266,8 @@ def t2SSAT16: T2SatI<
def t2USAT: T2SatI<
(outs rGPR:$Rd),
(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
@ -2275,7 +2277,7 @@ def t2USAT: T2SatI<
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasDSP]> {
Requires<[IsThumb2, HasDSP]> {
let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;

View File

@ -518,6 +518,10 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return true;
return false;
case ELF::R_MIPS_GOT_PAGE:
case ELF::R_MICROMIPS_GOT_PAGE:
case ELF::R_MIPS_GOT_OFST:
case ELF::R_MICROMIPS_GOT_OFST:
case ELF::R_MIPS_16:
case ELF::R_MIPS_32:
case ELF::R_MIPS_GPREL32:
@ -539,8 +543,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_SHIFT5:
case ELF::R_MIPS_SHIFT6:
case ELF::R_MIPS_GOT_DISP:
case ELF::R_MIPS_GOT_PAGE:
case ELF::R_MIPS_GOT_OFST:
case ELF::R_MIPS_GOT_HI16:
case ELF::R_MIPS_GOT_LO16:
case ELF::R_MIPS_INSERT_A:
@ -589,8 +591,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MICROMIPS_PC16_S1:
case ELF::R_MICROMIPS_CALL16:
case ELF::R_MICROMIPS_GOT_DISP:
case ELF::R_MICROMIPS_GOT_PAGE:
case ELF::R_MICROMIPS_GOT_OFST:
case ELF::R_MICROMIPS_GOT_HI16:
case ELF::R_MICROMIPS_GOT_LO16:
case ELF::R_MICROMIPS_SUB:

View File

@ -28,12 +28,19 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
// FIXME: This condition isn't quite right but it's the best we can do until
// this object can identify the ABI. It will misbehave when using O32
// on a mips64*-* triple.
if ((TheTriple.getArch() == Triple::mipsel) ||
(TheTriple.getArch() == Triple::mips)) {
PrivateGlobalPrefix = "$";
PrivateLabelPrefix = "$";
}
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
Data32bitsDirective = "\t.4byte\t";
Data64bitsDirective = "\t.8byte\t";
PrivateGlobalPrefix = "$";
PrivateLabelPrefix = "$";
CommentString = "#";
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";

View File

@ -57,7 +57,10 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
else
Ret += "E";
Ret += "-m:m";
if (ABI.IsO32())
Ret += "-m:m";
else
Ret += "-m:e";
// Pointers are 32 bit on some ABIs.
if (!ABI.IsN64())

View File

@ -1187,6 +1187,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
@ -13373,6 +13381,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
@ -13380,6 +13389,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
DAG.getUNDEF(SrcVT)));
}
if (SrcVT.getVectorElementType() == MVT::i1) {
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
@ -13694,6 +13706,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
if (SVT.getVectorElementType() == MVT::i1) {
if (SVT == MVT::v2i1)
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
}
switch (SVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");

View File

@ -2661,7 +2661,8 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, unsigned &NewSrc,
bool &isKill, bool &isUndef,
MachineOperand &ImplicitOp) const {
MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
if (AllowSP) {
@ -2715,13 +2716,17 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
get(TargetOpcode::COPY))
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
.addOperand(Src);
// Which is obviously going to be dead after we're done with it.
isKill = true;
isUndef = false;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
}
// We've set all the parameters without issue.
@ -2900,7 +2905,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -2943,7 +2948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -2977,7 +2982,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@ -3016,7 +3021,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
const MachineOperand &Src2 = MI.getOperand(2);
@ -3024,7 +3029,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, isUndef2, ImplicitOp2))
SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -3087,7 +3092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))

View File

@ -230,7 +230,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
bool &isKill, bool &isUndef,
MachineOperand &ImplicitOp) const;
MachineOperand &ImplicitOp, LiveVariables *LV) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target

View File

@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
@ -2009,24 +2009,35 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
@ -2096,10 +2107,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;

View File

@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
namespace llvm {
template <> struct GraphTraits<ArgumentGraphNode *> {
typedef ArgumentGraphNode NodeType;
typedef ArgumentGraphNode *NodeRef;
typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
static inline NodeType *getEntryNode(NodeType *A) { return A; }

View File

@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
// Instructions could multiply use V.
while (UI != E && *UI == I)
++UI;
I->eraseFromParent();
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
}
}

View File

@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental GVN Hoisting pass"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
FPM.add(createGVNHoistPass());
if(EnableGVNHoist)
FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}

View File

@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
// decomposeBitTestICmp() might help.
{
unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
unsigned BitWidth =
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y, *C;

View File

@ -2830,7 +2830,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
eraseInstFromFunction(*I);
if (isInstructionTriviallyDead(I, TLI))
eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@ -2851,7 +2852,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
eraseInstFromFunction(*I);
if (isInstructionTriviallyDead(I, TLI))
eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
Inst->eraseFromParent();
if (isInstructionTriviallyDead(Inst, TLI))
Inst->eraseFromParent();
continue;
}

View File

@ -272,8 +272,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) {
return false;
}
// Check if the global is in a GCOV counter array.
if (GV->getName().startswith("__llvm_gcov_ctr"))
// Check if the global is private gcov data.
if (GV->getName().startswith("__llvm_gcov") ||
GV->getName().startswith("__llvm_gcda"))
return false;
}

View File

@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
@ -90,11 +91,13 @@ bool ConstantPropagation::runOnFunction(Function &F) {
// Remove the dead instruction.
WorkList.erase(I);
I->eraseFromParent();
if (isInstructionTriviallyDead(I, TLI)) {
I->eraseFromParent();
++NumInstKilled;
}
// We made a change to the function...
Changed = true;
++NumInstKilled;
}
}
return Changed;

View File

@ -582,6 +582,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// its simpler value.
if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
bool Killed = false;
if (!Inst->use_empty()) {
Inst->replaceAllUsesWith(V);
Changed = true;
@ -589,11 +590,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (isInstructionTriviallyDead(Inst, &TLI)) {
Inst->eraseFromParent();
Changed = true;
Killed = true;
}
if (Changed) {
if (Changed)
++NumSimplify;
if (Killed)
continue;
}
}
// If this is a simple instruction that we can value number, process it.

View File

@ -815,6 +815,14 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
return;
// Check that `Cast` actually extends the induction variable (we rely on this
// later). This takes care of cases where `Cast` is extending a truncation of
// the narrow induction variable, and thus can end up being narrower than the
// "narrow" induction variable.
uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
if (NarrowIVWidth >= Width)
return;
// Cast is either an sext or zext up to this point.
// We should not widen an indvar if arithmetics on the wider indvar are more
// expensive than those on the narrower indvar. We check only the cost of ADD

View File

@ -758,7 +758,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
Condition = SimpleVal;
}
}

View File

@ -377,9 +377,11 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
I.replaceAllUsesWith(C);
I.eraseFromParent();
if (isInstructionTriviallyDead(&I, TLI)) {
CurAST->deleteValue(&I);
I.eraseFromParent();
}
continue;
}

View File

@ -4442,6 +4442,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
Rewriter.setInsertPoint(&*IP);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@ -4473,7 +4474,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
// Expand the ScaledReg portion.
@ -4491,14 +4492,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Expand ScaleReg as if it was part of the base regs.
if (F.Scale == 1)
Ops.push_back(
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
else {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
}
} else {
// Otherwise just expand the scaled register and an explicit scale,
@ -4508,11 +4509,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Unless the addressing mode will not be folded.
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@ -4524,7 +4525,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.BaseGV) {
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@ -4534,7 +4535,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Flush the operand list to suppress SCEVExpander hoisting of both folded and
// unfolded offsets. LSR assumes they both live next to their uses.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
@ -4570,7 +4571,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();

View File

@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@ -552,9 +553,39 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// two PHINodes, the iteration over the old PHIs remains valid, and the
// mapping will just map us to the new node (which may not even be a PHI
// node).
const DataLayout &DL = NewFunc->getParent()->getDataLayout();
SmallSetVector<const Value *, 8> Worklist;
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
recursivelySimplifyInstruction(PN);
if (isa<PHINode>(VMap[PHIToResolve[Idx]]))
Worklist.insert(PHIToResolve[Idx]);
// Note that we must test the size on each iteration, the worklist can grow.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
const Value *OrigV = Worklist[Idx];
auto *I = dyn_cast_or_null<Instruction>(VMap.lookup(OrigV));
if (!I)
continue;
// See if this instruction simplifies.
Value *SimpleV = SimplifyInstruction(I, DL);
if (!SimpleV)
continue;
// Stash away all the uses of the old instruction so we can check them for
// recursive simplifications after a RAUW. This is cheaper than checking all
// uses of To on the recursive step in most cases.
for (const User *U : OrigV->users())
Worklist.insert(cast<Instruction>(U));
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
// If the original instruction had no side effects, remove it.
if (isInstructionTriviallyDead(I))
I->eraseFromParent();
else
VMap[OrigV] = I;
}
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happens all the time when

View File

@ -1294,6 +1294,13 @@ updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode,
return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
}
/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
/// block. Allocas used in inalloca calls and allocas of dynamic array size
/// cannot be static.
static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
}
/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@ -1328,7 +1335,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
// Don't update static allocas, as they may get moved later.
if (auto *AI = dyn_cast<AllocaInst>(BI))
if (isa<Constant>(AI->getArraySize()))
if (allocaWouldBeStaticInEntry(AI))
continue;
BI->setDebugLoc(TheCallDL);
@ -1626,7 +1633,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
continue;
}
if (!isa<Constant>(AI->getArraySize()))
if (!allocaWouldBeStaticInEntry(AI))
continue;
// Keep track of the static allocas that we inline into the caller.
@ -1635,7 +1642,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Scan for the block of allocas that we can move over, and move them
// all at once.
while (isa<AllocaInst>(I) &&
isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {
IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
++I;
}

View File

@ -64,6 +64,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
DominatorTree &DT, LoopInfo &LI) {
SmallVector<Use *, 16> UsesToRewrite;
SmallVector<BasicBlock *, 8> ExitBlocks;
SmallSetVector<PHINode *, 16> PHIsToRemove;
PredIteratorCache PredCache;
bool Changed = false;
@ -115,7 +116,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
SmallVector<PHINode *, 16> AddedPHIs;
SmallVector<PHINode *, 8> PostProcessPHIs;
SSAUpdater SSAUpdate;
SmallVector<PHINode *, 4> InsertedPHIs;
SSAUpdater SSAUpdate(&InsertedPHIs);
SSAUpdate.Initialize(I->getType(), I->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
@ -184,6 +186,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UseToRewrite);
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
// to post-process them to keep LCSSA form.
for (PHINode *InsertedPN : InsertedPHIs) {
if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
if (!L->contains(OtherLoop))
PostProcessPHIs.push_back(InsertedPN);
}
}
// Post process PHI instructions that were inserted into another disjoint
@ -196,13 +206,19 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Worklist.push_back(PostProcessPN);
}
// Remove PHI nodes that did not have any uses rewritten.
// Keep track of PHI nodes that we want to remove because they did not have
// any uses rewritten.
for (PHINode *PN : AddedPHIs)
if (PN->use_empty())
PN->eraseFromParent();
PHIsToRemove.insert(PN);
Changed = true;
}
// Remove PHI nodes that did not have any uses rewritten.
for (PHINode *PN : PHIsToRemove) {
assert (PN->use_empty() && "Trying to remove a phi with uses.");
PN->eraseFromParent();
}
return Changed;
}

View File

@ -327,6 +327,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
else
NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
SmallVector<BasicBlock *, 8> OuterLoopBlocks;
OuterLoopBlocks.push_back(NewBB);
// Now that we know which blocks are in L and which need to be moved to
// OuterLoop, move any blocks that need it.
for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
@ -334,12 +336,53 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
if (!BlocksInL.count(BB)) {
// Move this block to the parent, updating the exit blocks sets
L->removeBlockFromLoop(BB);
if ((*LI)[BB] == L)
if ((*LI)[BB] == L) {
LI->changeLoopFor(BB, NewOuter);
OuterLoopBlocks.push_back(BB);
}
--i;
}
}
// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getExitBlocks(ExitBlocks);
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
for (BasicBlock *ExitBlock : ExitBlockSet) {
if (any_of(predecessors(ExitBlock),
[L](BasicBlock *BB) { return !L->contains(BB); })) {
rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
}
}
if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
// L, can now be used in NewOuter loop. We need to insert phi-nodes for them
// in corresponding exit blocks.
// Go through all instructions in OuterLoopBlocks and check if they are
// using operands from the inner loop. In this case we'll need to fix LCSSA
// for these instructions.
SmallSetVector<Instruction *, 8> WorklistSet;
for (BasicBlock *OuterBB: OuterLoopBlocks) {
for (Instruction &I : *OuterBB) {
for (Value *Op : I.operands()) {
Instruction *OpI = dyn_cast<Instruction>(Op);
if (!OpI || !L->contains(OpI))
continue;
WorklistSet.insert(OpI);
}
}
}
SmallVector<Instruction *, 8> Worklist(WorklistSet.begin(),
WorklistSet.end());
formLCSSAForInstructions(Worklist, *DT, *LI);
assert(NewOuter->isRecursivelyLCSSAForm(*DT) &&
"LCSSA is broken after separating nested loops!");
}
return NewOuter;
}
@ -541,17 +584,12 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());
for (BasicBlock *ExitBlock : ExitBlockSet) {
for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
PI != PE; ++PI)
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
++NumInserted;
Changed = true;
}
break;
}
if (any_of(predecessors(ExitBlock),
[L](BasicBlock *BB) { return !L->contains(BB); })) {
rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA);
++NumInserted;
Changed = true;
}
}
// If the header has more than two predecessors at this point (from the

View File

@ -50,6 +50,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@ -220,6 +221,81 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class LoopVectorizationRequirements;
// A traits type that is intended to be used in graph algorithms. The graph it
// models starts at the loop header, and traverses the BasicBlocks that are in
// the loop body, but not the loop header. Since the loop header is skipped,
// the back edges are excluded.
struct LoopBodyTraits {
using NodeRef = std::pair<const Loop *, BasicBlock *>;
// This wraps a const Loop * into the iterator, so we know which edges to
// filter out.
class WrappedSuccIterator
: public iterator_adaptor_base<
WrappedSuccIterator, succ_iterator,
typename std::iterator_traits<succ_iterator>::iterator_category,
NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
using BaseT = iterator_adaptor_base<
WrappedSuccIterator, succ_iterator,
typename std::iterator_traits<succ_iterator>::iterator_category,
NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>;
const Loop *L;
public:
WrappedSuccIterator(succ_iterator Begin, const Loop *L)
: BaseT(Begin), L(L) {}
NodeRef operator*() const { return {L, *I}; }
};
struct LoopBodyFilter {
bool operator()(NodeRef N) const {
const Loop *L = N.first;
return N.second != L->getHeader() && L->contains(N.second);
}
};
using ChildIteratorType =
filter_iterator<WrappedSuccIterator, LoopBodyFilter>;
static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; }
static ChildIteratorType child_begin(NodeRef Node) {
return make_filter_range(make_range<WrappedSuccIterator>(
{succ_begin(Node.second), Node.first},
{succ_end(Node.second), Node.first}),
LoopBodyFilter{})
.begin();
}
static ChildIteratorType child_end(NodeRef Node) {
return make_filter_range(make_range<WrappedSuccIterator>(
{succ_begin(Node.second), Node.first},
{succ_end(Node.second), Node.first}),
LoopBodyFilter{})
.end();
}
};
/// Returns true if the given loop body has a cycle, excluding the loop
/// itself.
static bool hasCyclesInLoopBody(const Loop &L) {
if (!L.empty())
return true;
for (const auto SCC :
make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
scc_iterator<Loop, LoopBodyTraits>::end(L))) {
if (SCC.size() > 1) {
DEBUG(dbgs() << "LVL: Detected a cycle in the loop body:\n");
DEBUG(L.dump());
return true;
}
}
return false;
}
/// \brief This modifies LoopAccessReport to initialize message with
/// loop-vectorizer-specific part.
class VectorizationReport : public LoopAccessReport {
@ -1782,12 +1858,14 @@ class LoopVectorizationRequirements {
Instruction *UnsafeAlgebraInst;
};
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
if (L.empty())
return V.push_back(&L);
static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
if (L.empty()) {
if (!hasCyclesInLoopBody(L))
V.push_back(&L);
return;
}
for (Loop *InnerL : L)
addInnerLoop(*InnerL, V);
addAcyclicInnerLoop(*InnerL, V);
}
/// The LoopVectorize Pass.
@ -4395,6 +4473,9 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
// FIXME: The code is currently dead, since the loop gets sent to
// LoopVectorizationLegality is already an innermost loop.
//
// We can only vectorize innermost loops.
if (!TheLoop->empty()) {
emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
@ -6639,7 +6720,7 @@ bool LoopVectorizePass::runImpl(
SmallVector<Loop *, 8> Worklist;
for (Loop *L : *LI)
addInnerLoop(*L, Worklist);
addAcyclicInnerLoop(*L, Worklist);
LoopsAnalyzed += Worklist.size();