Merge llvm, clang, lld and lldb trunk r291476.
This commit is contained in:
commit
618592e561
@ -1491,6 +1491,8 @@ class ScalarEvolution {
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
void verify() const;
|
||||
bool invalidate(Function &F, const PreservedAnalyses &PA,
|
||||
FunctionAnalysisManager::Invalidator &Inv);
|
||||
|
||||
/// Collect parametric terms occurring in step expressions (first step of
|
||||
/// delinearization).
|
||||
|
@ -290,7 +290,7 @@ class TargetLibraryInfo {
|
||||
}
|
||||
|
||||
/// Returns extension attribute kind to be used for i32 parameters
|
||||
/// correpsonding to C-level int or unsigned int. May be zeroext, signext,
|
||||
/// corresponding to C-level int or unsigned int. May be zeroext, signext,
|
||||
/// or none.
|
||||
Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const {
|
||||
if (Impl->ShouldExtI32Param)
|
||||
@ -301,7 +301,7 @@ class TargetLibraryInfo {
|
||||
}
|
||||
|
||||
/// Returns extension attribute kind to be used for i32 return values
|
||||
/// correpsonding to C-level int or unsigned int. May be zeroext, signext,
|
||||
/// corresponding to C-level int or unsigned int. May be zeroext, signext,
|
||||
/// or none.
|
||||
Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const {
|
||||
if (Impl->ShouldExtI32Return)
|
||||
|
@ -308,6 +308,16 @@ class MachineBasicBlock
|
||||
// Iteration support for live in sets. These sets are kept in sorted
|
||||
// order by their register number.
|
||||
typedef LiveInVector::const_iterator livein_iterator;
|
||||
#ifndef NDEBUG
|
||||
/// Unlike livein_begin, this method does not check that the liveness
|
||||
/// information is accurate. Still for debug purposes it may be useful
|
||||
/// to have iterators that won't assert if the liveness information
|
||||
/// is not current.
|
||||
livein_iterator livein_begin_dbg() const { return LiveIns.begin(); }
|
||||
iterator_range<livein_iterator> liveins_dbg() const {
|
||||
return make_range(livein_begin_dbg(), livein_end());
|
||||
}
|
||||
#endif
|
||||
livein_iterator livein_begin() const;
|
||||
livein_iterator livein_end() const { return LiveIns.end(); }
|
||||
bool livein_empty() const { return LiveIns.empty(); }
|
||||
|
@ -148,8 +148,7 @@ class MachineFrameInfo {
|
||||
/// grouping overaligned allocas into a "secondary stack frame" and
|
||||
/// then only use a single alloca to allocate this frame and only a
|
||||
/// single virtual register to access it. Currently, without such an
|
||||
/// optimization, each such alloca gets it's own dynamic
|
||||
/// realignment.
|
||||
/// optimization, each such alloca gets its own dynamic realignment.
|
||||
bool StackRealignable;
|
||||
|
||||
/// Whether the function has the \c alignstack attribute.
|
||||
|
@ -11,6 +11,7 @@
|
||||
#define LLVM_DEBUGINFO_MSF_STREAMARRAY_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/DebugInfo/MSF/StreamRef.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include <cassert>
|
||||
@ -107,7 +108,10 @@ class VarStreamArray {
|
||||
Extractor E;
|
||||
};
|
||||
|
||||
template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
|
||||
template <typename ValueType, typename Extractor>
|
||||
class VarStreamArrayIterator
|
||||
: public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
|
||||
std::forward_iterator_tag, ValueType> {
|
||||
typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
|
||||
typedef VarStreamArray<ValueType, Extractor> ArrayType;
|
||||
|
||||
@ -144,41 +148,39 @@ template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator!=(const IterType &R) { return !(*this == R); }
|
||||
|
||||
const ValueType &operator*() const {
|
||||
assert(Array && !HasError);
|
||||
return ThisValue;
|
||||
}
|
||||
|
||||
IterType &operator++() {
|
||||
// We are done with the current record, discard it so that we are
|
||||
// positioned at the next record.
|
||||
IterRef = IterRef.drop_front(ThisLen);
|
||||
if (IterRef.getLength() == 0) {
|
||||
// There is nothing after the current record, we must make this an end
|
||||
// iterator.
|
||||
moveToEnd();
|
||||
} else {
|
||||
// There is some data after the current record.
|
||||
auto EC = Extract(IterRef, ThisLen, ThisValue);
|
||||
if (EC) {
|
||||
consumeError(std::move(EC));
|
||||
markError();
|
||||
} else if (ThisLen == 0) {
|
||||
// An empty record? Make this an end iterator.
|
||||
IterType &operator+=(std::ptrdiff_t N) {
|
||||
while (N > 0) {
|
||||
// We are done with the current record, discard it so that we are
|
||||
// positioned at the next record.
|
||||
IterRef = IterRef.drop_front(ThisLen);
|
||||
if (IterRef.getLength() == 0) {
|
||||
// There is nothing after the current record, we must make this an end
|
||||
// iterator.
|
||||
moveToEnd();
|
||||
return *this;
|
||||
} else {
|
||||
// There is some data after the current record.
|
||||
auto EC = Extract(IterRef, ThisLen, ThisValue);
|
||||
if (EC) {
|
||||
consumeError(std::move(EC));
|
||||
markError();
|
||||
return *this;
|
||||
} else if (ThisLen == 0) {
|
||||
// An empty record? Make this an end iterator.
|
||||
moveToEnd();
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
--N;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
IterType operator++(int) {
|
||||
IterType Original = *this;
|
||||
++*this;
|
||||
return Original;
|
||||
}
|
||||
|
||||
private:
|
||||
void moveToEnd() {
|
||||
Array = nullptr;
|
||||
@ -211,6 +213,16 @@ template <typename T> class FixedStreamArray {
|
||||
assert(Stream.getLength() % sizeof(T) == 0);
|
||||
}
|
||||
|
||||
bool operator==(const FixedStreamArray<T> &Other) const {
|
||||
return Stream == Other.Stream;
|
||||
}
|
||||
|
||||
bool operator!=(const FixedStreamArray<T> &Other) const {
|
||||
return !(*this == Other);
|
||||
}
|
||||
|
||||
FixedStreamArray &operator=(const FixedStreamArray &) = default;
|
||||
|
||||
const T &operator[](uint32_t Index) const {
|
||||
assert(Index < size());
|
||||
uint32_t Off = Index * sizeof(T);
|
||||
@ -226,6 +238,8 @@ template <typename T> class FixedStreamArray {
|
||||
|
||||
uint32_t size() const { return Stream.getLength() / sizeof(T); }
|
||||
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
FixedStreamArrayIterator<T> begin() const {
|
||||
return FixedStreamArrayIterator<T>(*this, 0);
|
||||
}
|
||||
@ -240,36 +254,53 @@ template <typename T> class FixedStreamArray {
|
||||
ReadableStreamRef Stream;
|
||||
};
|
||||
|
||||
template <typename T> class FixedStreamArrayIterator {
|
||||
template <typename T>
|
||||
class FixedStreamArrayIterator
|
||||
: public iterator_facade_base<FixedStreamArrayIterator<T>,
|
||||
std::random_access_iterator_tag, T> {
|
||||
|
||||
public:
|
||||
FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
|
||||
: Array(Array), Index(Index) {}
|
||||
|
||||
bool operator==(const FixedStreamArrayIterator<T> &R) {
|
||||
assert(&Array == &R.Array);
|
||||
return Index == R.Index;
|
||||
}
|
||||
|
||||
bool operator!=(const FixedStreamArrayIterator<T> &R) {
|
||||
return !(*this == R);
|
||||
FixedStreamArrayIterator<T> &
|
||||
operator=(const FixedStreamArrayIterator<T> &Other) {
|
||||
Array = Other.Array;
|
||||
Index = Other.Index;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const T &operator*() const { return Array[Index]; }
|
||||
|
||||
FixedStreamArrayIterator<T> &operator++() {
|
||||
assert(Index < Array.size());
|
||||
++Index;
|
||||
bool operator==(const FixedStreamArrayIterator<T> &R) const {
|
||||
assert(Array == R.Array);
|
||||
return (Index == R.Index) && (Array == R.Array);
|
||||
}
|
||||
|
||||
FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
|
||||
Index += N;
|
||||
return *this;
|
||||
}
|
||||
|
||||
FixedStreamArrayIterator<T> operator++(int) {
|
||||
FixedStreamArrayIterator<T> Original = *this;
|
||||
++*this;
|
||||
return Original;
|
||||
FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
|
||||
assert(Index >= N);
|
||||
Index -= N;
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
|
||||
assert(Array == R.Array);
|
||||
assert(Index >= R.Index);
|
||||
return Index - R.Index;
|
||||
}
|
||||
|
||||
bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
|
||||
assert(Array == RHS.Array);
|
||||
return Index < RHS.Index;
|
||||
}
|
||||
|
||||
private:
|
||||
const FixedStreamArray<T> &Array;
|
||||
FixedStreamArray<T> Array;
|
||||
uint32_t Index;
|
||||
};
|
||||
|
||||
|
@ -83,7 +83,7 @@ class SerializationTraits<
|
||||
namespace remote {
|
||||
|
||||
class OrcRemoteTargetRPCAPI
|
||||
: public rpc::SingleThreadedRPC<rpc::RawByteChannel> {
|
||||
: public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
|
||||
protected:
|
||||
class ResourceIdMgr {
|
||||
public:
|
||||
@ -108,7 +108,7 @@ class OrcRemoteTargetRPCAPI
|
||||
public:
|
||||
// FIXME: Remove constructors once MSVC supports synthesizing move-ops.
|
||||
OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C)
|
||||
: rpc::SingleThreadedRPC<rpc::RawByteChannel>(C, true) {}
|
||||
: rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(C, true) {}
|
||||
|
||||
class CallIntVoid
|
||||
: public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> {
|
||||
|
@ -702,7 +702,7 @@ class CanDeserialize {
|
||||
/// sync.
|
||||
template <typename ImplT, typename ChannelT, typename FunctionIdT,
|
||||
typename SequenceNumberT>
|
||||
class RPCBase {
|
||||
class RPCEndpointBase {
|
||||
protected:
|
||||
class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> {
|
||||
public:
|
||||
@ -747,7 +747,7 @@ class RPCBase {
|
||||
|
||||
public:
|
||||
/// Construct an RPC instance on a channel.
|
||||
RPCBase(ChannelT &C, bool LazyAutoNegotiation)
|
||||
RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
|
||||
: C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
|
||||
// Hold ResponseId in a special variable, since we expect Response to be
|
||||
// called relatively frequently, and want to avoid the map lookup.
|
||||
@ -788,15 +788,21 @@ class RPCBase {
|
||||
return FnIdOrErr.takeError();
|
||||
}
|
||||
|
||||
// Allocate a sequence number.
|
||||
auto SeqNo = SequenceNumberMgr.getSequenceNumber();
|
||||
assert(!PendingResponses.count(SeqNo) &&
|
||||
"Sequence number already allocated");
|
||||
SequenceNumberT SeqNo; // initialized in locked scope below.
|
||||
{
|
||||
// Lock the pending responses map and sequence number manager.
|
||||
std::lock_guard<std::mutex> Lock(ResponsesMutex);
|
||||
|
||||
// Install the user handler.
|
||||
PendingResponses[SeqNo] =
|
||||
// Allocate a sequence number.
|
||||
SeqNo = SequenceNumberMgr.getSequenceNumber();
|
||||
assert(!PendingResponses.count(SeqNo) &&
|
||||
"Sequence number already allocated");
|
||||
|
||||
// Install the user handler.
|
||||
PendingResponses[SeqNo] =
|
||||
detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
|
||||
std::move(Handler));
|
||||
}
|
||||
|
||||
// Open the function call message.
|
||||
if (auto Err = C.startSendMessage(FnId, SeqNo)) {
|
||||
@ -863,11 +869,33 @@ class RPCBase {
|
||||
return detail::ReadArgs<ArgTs...>(Args...);
|
||||
}
|
||||
|
||||
/// Abandon all outstanding result handlers.
|
||||
///
|
||||
/// This will call all currently registered result handlers to receive an
|
||||
/// "abandoned" error as their argument. This is used internally by the RPC
|
||||
/// in error situations, but can also be called directly by clients who are
|
||||
/// disconnecting from the remote and don't or can't expect responses to their
|
||||
/// outstanding calls. (Especially for outstanding blocking calls, calling
|
||||
/// this function may be necessary to avoid dead threads).
|
||||
void abandonPendingResponses() {
|
||||
// Lock the pending responses map and sequence number manager.
|
||||
std::lock_guard<std::mutex> Lock(ResponsesMutex);
|
||||
|
||||
for (auto &KV : PendingResponses)
|
||||
KV.second->abandon();
|
||||
PendingResponses.clear();
|
||||
SequenceNumberMgr.reset();
|
||||
}
|
||||
|
||||
protected:
|
||||
// The LaunchPolicy type allows a launch policy to be specified when adding
|
||||
// a function handler. See addHandlerImpl.
|
||||
using LaunchPolicy = std::function<Error(std::function<Error()>)>;
|
||||
|
||||
FunctionIdT getInvalidFunctionId() const {
|
||||
return FnIdAllocator.getInvalidId();
|
||||
}
|
||||
|
||||
/// Add the given handler to the handler map and make it available for
|
||||
/// autonegotiation and execution.
|
||||
template <typename Func, typename HandlerT>
|
||||
@ -884,28 +912,32 @@ class RPCBase {
|
||||
wrapHandler<Func>(std::move(Handler), std::move(Launch));
|
||||
}
|
||||
|
||||
// Abandon all outstanding results.
|
||||
void abandonPendingResponses() {
|
||||
for (auto &KV : PendingResponses)
|
||||
KV.second->abandon();
|
||||
PendingResponses.clear();
|
||||
SequenceNumberMgr.reset();
|
||||
}
|
||||
|
||||
Error handleResponse(SequenceNumberT SeqNo) {
|
||||
auto I = PendingResponses.find(SeqNo);
|
||||
if (I == PendingResponses.end()) {
|
||||
abandonPendingResponses();
|
||||
return orcError(OrcErrorCode::UnexpectedRPCResponse);
|
||||
using Handler = typename decltype(PendingResponses)::mapped_type;
|
||||
Handler PRHandler;
|
||||
|
||||
{
|
||||
// Lock the pending responses map and sequence number manager.
|
||||
std::unique_lock<std::mutex> Lock(ResponsesMutex);
|
||||
auto I = PendingResponses.find(SeqNo);
|
||||
|
||||
if (I != PendingResponses.end()) {
|
||||
PRHandler = std::move(I->second);
|
||||
PendingResponses.erase(I);
|
||||
SequenceNumberMgr.releaseSequenceNumber(SeqNo);
|
||||
} else {
|
||||
// Unlock the pending results map to prevent recursive lock.
|
||||
Lock.unlock();
|
||||
abandonPendingResponses();
|
||||
return orcError(OrcErrorCode::UnexpectedRPCResponse);
|
||||
}
|
||||
}
|
||||
|
||||
auto PRHandler = std::move(I->second);
|
||||
PendingResponses.erase(I);
|
||||
SequenceNumberMgr.releaseSequenceNumber(SeqNo);
|
||||
assert(PRHandler &&
|
||||
"If we didn't find a response handler we should have bailed out");
|
||||
|
||||
if (auto Err = PRHandler->handleResponse(C)) {
|
||||
abandonPendingResponses();
|
||||
SequenceNumberMgr.reset();
|
||||
return Err;
|
||||
}
|
||||
|
||||
@ -915,7 +947,7 @@ class RPCBase {
|
||||
FunctionIdT handleNegotiate(const std::string &Name) {
|
||||
auto I = LocalFunctionIds.find(Name);
|
||||
if (I == LocalFunctionIds.end())
|
||||
return FnIdAllocator.getInvalidId();
|
||||
return getInvalidFunctionId();
|
||||
return I->second;
|
||||
}
|
||||
|
||||
@ -938,7 +970,7 @@ class RPCBase {
|
||||
|
||||
// If autonegotiation indicates that the remote end doesn't support this
|
||||
// function, return an unknown function error.
|
||||
if (RemoteId == FnIdAllocator.getInvalidId())
|
||||
if (RemoteId == getInvalidFunctionId())
|
||||
return orcError(OrcErrorCode::UnknownRPCFunction);
|
||||
|
||||
// Autonegotiation succeeded and returned a valid id. Update the map and
|
||||
@ -1012,6 +1044,7 @@ class RPCBase {
|
||||
|
||||
std::map<FunctionIdT, WrappedHandlerFn> Handlers;
|
||||
|
||||
std::mutex ResponsesMutex;
|
||||
detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
|
||||
std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
|
||||
PendingResponses;
|
||||
@ -1021,17 +1054,18 @@ class RPCBase {
|
||||
|
||||
template <typename ChannelT, typename FunctionIdT = uint32_t,
|
||||
typename SequenceNumberT = uint32_t>
|
||||
class MultiThreadedRPC
|
||||
: public detail::RPCBase<
|
||||
MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
|
||||
FunctionIdT, SequenceNumberT> {
|
||||
class MultiThreadedRPCEndpoint
|
||||
: public detail::RPCEndpointBase<
|
||||
MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT> {
|
||||
private:
|
||||
using BaseClass =
|
||||
detail::RPCBase<MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT>;
|
||||
detail::RPCEndpointBase<
|
||||
MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT>;
|
||||
|
||||
public:
|
||||
MultiThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
|
||||
MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
|
||||
: BaseClass(C, LazyAutoNegotiation) {}
|
||||
|
||||
/// The LaunchPolicy type allows a launch policy to be specified when adding
|
||||
@ -1061,30 +1095,41 @@ class MultiThreadedRPC
|
||||
std::move(Launch));
|
||||
}
|
||||
|
||||
/// Add a class-method as a handler.
|
||||
template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
|
||||
void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...),
|
||||
LaunchPolicy Launch = LaunchPolicy()) {
|
||||
addHandler<Func>(
|
||||
detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method),
|
||||
Launch);
|
||||
}
|
||||
|
||||
/// Negotiate a function id for Func with the other end of the channel.
|
||||
template <typename Func> Error negotiateFunction() {
|
||||
template <typename Func> Error negotiateFunction(bool Retry = false) {
|
||||
using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
|
||||
|
||||
// Check if we already have a function id...
|
||||
auto I = this->RemoteFunctionIds.find(Func::getPrototype());
|
||||
if (I != this->RemoteFunctionIds.end()) {
|
||||
// If it's valid there's nothing left to do.
|
||||
if (I->second != this->getInvalidFunctionId())
|
||||
return Error::success();
|
||||
// If it's invalid and we can't re-attempt negotiation, throw an error.
|
||||
if (!Retry)
|
||||
return orcError(OrcErrorCode::UnknownRPCFunction);
|
||||
}
|
||||
|
||||
// We don't have a function id for Func yet, call the remote to try to
|
||||
// negotiate one.
|
||||
if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
|
||||
this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
|
||||
if (*RemoteIdOrErr == this->getInvalidFunctionId())
|
||||
return orcError(OrcErrorCode::UnknownRPCFunction);
|
||||
return Error::success();
|
||||
} else
|
||||
return RemoteIdOrErr.takeError();
|
||||
}
|
||||
|
||||
/// Convenience method for negotiating multiple functions at once.
|
||||
template <typename Func> Error negotiateFunctions() {
|
||||
return negotiateFunction<Func>();
|
||||
}
|
||||
|
||||
/// Convenience method for negotiating multiple functions at once.
|
||||
template <typename Func1, typename Func2, typename... Funcs>
|
||||
Error negotiateFunctions() {
|
||||
if (auto Err = negotiateFunction<Func1>())
|
||||
return Err;
|
||||
return negotiateFunctions<Func2, Funcs...>();
|
||||
}
|
||||
|
||||
/// Return type for non-blocking call primitives.
|
||||
template <typename Func>
|
||||
using NonBlockingCallResult = typename detail::ResultTraits<
|
||||
@ -1169,19 +1214,20 @@ class MultiThreadedRPC
|
||||
|
||||
template <typename ChannelT, typename FunctionIdT = uint32_t,
|
||||
typename SequenceNumberT = uint32_t>
|
||||
class SingleThreadedRPC
|
||||
: public detail::RPCBase<
|
||||
SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
|
||||
FunctionIdT, SequenceNumberT> {
|
||||
class SingleThreadedRPCEndpoint
|
||||
: public detail::RPCEndpointBase<
|
||||
SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT> {
|
||||
private:
|
||||
using BaseClass =
|
||||
detail::RPCBase<SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT>;
|
||||
detail::RPCEndpointBase<
|
||||
SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
|
||||
ChannelT, FunctionIdT, SequenceNumberT>;
|
||||
|
||||
using LaunchPolicy = typename BaseClass::LaunchPolicy;
|
||||
|
||||
public:
|
||||
SingleThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
|
||||
SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
|
||||
: BaseClass(C, LazyAutoNegotiation) {}
|
||||
|
||||
template <typename Func, typename HandlerT>
|
||||
@ -1197,29 +1243,31 @@ class SingleThreadedRPC
|
||||
}
|
||||
|
||||
/// Negotiate a function id for Func with the other end of the channel.
|
||||
template <typename Func> Error negotiateFunction() {
|
||||
template <typename Func> Error negotiateFunction(bool Retry = false) {
|
||||
using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
|
||||
|
||||
// Check if we already have a function id...
|
||||
auto I = this->RemoteFunctionIds.find(Func::getPrototype());
|
||||
if (I != this->RemoteFunctionIds.end()) {
|
||||
// If it's valid there's nothing left to do.
|
||||
if (I->second != this->getInvalidFunctionId())
|
||||
return Error::success();
|
||||
// If it's invalid and we can't re-attempt negotiation, throw an error.
|
||||
if (!Retry)
|
||||
return orcError(OrcErrorCode::UnknownRPCFunction);
|
||||
}
|
||||
|
||||
// We don't have a function id for Func yet, call the remote to try to
|
||||
// negotiate one.
|
||||
if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
|
||||
this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
|
||||
if (*RemoteIdOrErr == this->getInvalidFunctionId())
|
||||
return orcError(OrcErrorCode::UnknownRPCFunction);
|
||||
return Error::success();
|
||||
} else
|
||||
return RemoteIdOrErr.takeError();
|
||||
}
|
||||
|
||||
/// Convenience method for negotiating multiple functions at once.
|
||||
template <typename Func> Error negotiateFunctions() {
|
||||
return negotiateFunction<Func>();
|
||||
}
|
||||
|
||||
/// Convenience method for negotiating multiple functions at once.
|
||||
template <typename Func1, typename Func2, typename... Funcs>
|
||||
Error negotiateFunctions() {
|
||||
if (auto Err = negotiateFunction<Func1>())
|
||||
return Err;
|
||||
return negotiateFunctions<Func2, Funcs...>();
|
||||
}
|
||||
|
||||
template <typename Func, typename... ArgTs,
|
||||
typename AltRetT = typename Func::ReturnType>
|
||||
typename detail::ResultTraits<AltRetT>::ErrorReturnType
|
||||
@ -1332,6 +1380,68 @@ template <typename RPCClass> class ParallelCallGroup {
|
||||
uint32_t NumOutstandingCalls;
|
||||
};
|
||||
|
||||
/// @brief Convenience class for grouping RPC Functions into APIs that can be
|
||||
/// negotiated as a block.
|
||||
///
|
||||
template <typename... Funcs>
|
||||
class APICalls {
|
||||
public:
|
||||
|
||||
/// @brief Test whether this API contains Function F.
|
||||
template <typename F>
|
||||
class Contains {
|
||||
public:
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
/// @brief Negotiate all functions in this API.
|
||||
template <typename RPCEndpoint>
|
||||
static Error negotiate(RPCEndpoint &R) {
|
||||
return Error::success();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Func, typename... Funcs>
|
||||
class APICalls<Func, Funcs...> {
|
||||
public:
|
||||
|
||||
template <typename F>
|
||||
class Contains {
|
||||
public:
|
||||
static const bool value = std::is_same<F, Func>::value |
|
||||
APICalls<Funcs...>::template Contains<F>::value;
|
||||
};
|
||||
|
||||
template <typename RPCEndpoint>
|
||||
static Error negotiate(RPCEndpoint &R) {
|
||||
if (auto Err = R.template negotiateFunction<Func>())
|
||||
return Err;
|
||||
return APICalls<Funcs...>::negotiate(R);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <typename... InnerFuncs, typename... Funcs>
|
||||
class APICalls<APICalls<InnerFuncs...>, Funcs...> {
|
||||
public:
|
||||
|
||||
template <typename F>
|
||||
class Contains {
|
||||
public:
|
||||
static const bool value =
|
||||
APICalls<InnerFuncs...>::template Contains<F>::value |
|
||||
APICalls<Funcs...>::template Contains<F>::value;
|
||||
};
|
||||
|
||||
template <typename RPCEndpoint>
|
||||
static Error negotiate(RPCEndpoint &R) {
|
||||
if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
|
||||
return Err;
|
||||
return APICalls<Funcs...>::negotiate(R);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // end namespace rpc
|
||||
} // end namespace orc
|
||||
} // end namespace llvm
|
||||
|
@ -48,9 +48,7 @@ class RawByteChannel {
|
||||
template <typename FunctionIdT, typename SequenceIdT>
|
||||
Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
|
||||
writeLock.lock();
|
||||
if (auto Err = serializeSeq(*this, FnId, SeqNo))
|
||||
return Err;
|
||||
return Error::success();
|
||||
return serializeSeq(*this, FnId, SeqNo);
|
||||
}
|
||||
|
||||
/// Notify the channel that we're ending a message send.
|
||||
|
@ -28,14 +28,14 @@ template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
|
||||
|
||||
template <> struct MappingTraits<TypeTestResolution> {
|
||||
static void mapping(IO &io, TypeTestResolution &res) {
|
||||
io.mapRequired("Kind", res.TheKind);
|
||||
io.mapRequired("SizeBitWidth", res.SizeBitWidth);
|
||||
io.mapOptional("Kind", res.TheKind);
|
||||
io.mapOptional("SizeBitWidth", res.SizeBitWidth);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct MappingTraits<TypeIdSummary> {
|
||||
static void mapping(IO &io, TypeIdSummary& summary) {
|
||||
io.mapRequired("TTRes", summary.TTRes);
|
||||
io.mapOptional("TTRes", summary.TTRes);
|
||||
}
|
||||
};
|
||||
|
||||
@ -53,7 +53,7 @@ namespace yaml {
|
||||
|
||||
template <> struct MappingTraits<FunctionSummaryYaml> {
|
||||
static void mapping(IO &io, FunctionSummaryYaml& summary) {
|
||||
io.mapRequired("TypeTests", summary.TypeTests);
|
||||
io.mapOptional("TypeTests", summary.TypeTests);
|
||||
}
|
||||
};
|
||||
|
||||
@ -100,8 +100,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
|
||||
|
||||
template <> struct MappingTraits<ModuleSummaryIndex> {
|
||||
static void mapping(IO &io, ModuleSummaryIndex& index) {
|
||||
io.mapRequired("GlobalValueMap", index.GlobalValueMap);
|
||||
io.mapRequired("TypeIdMap", index.TypeIdMap);
|
||||
io.mapOptional("GlobalValueMap", index.GlobalValueMap);
|
||||
io.mapOptional("TypeIdMap", index.TypeIdMap);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -879,18 +879,22 @@ extern template class AnalysisManager<Function>;
|
||||
/// \brief Convenience typedef for the Function analysis manager.
|
||||
typedef AnalysisManager<Function> FunctionAnalysisManager;
|
||||
|
||||
/// \brief A module analysis which acts as a proxy for a function analysis
|
||||
/// manager.
|
||||
/// \brief An analysis over an "outer" IR unit that provides access to an
|
||||
/// analysis manager over an "inner" IR unit. The inner unit must be contained
|
||||
/// in the outer unit.
|
||||
///
|
||||
/// This primarily proxies invalidation information from the module analysis
|
||||
/// manager and module pass manager to a function analysis manager. You should
|
||||
/// never use a function analysis manager from within (transitively) a module
|
||||
/// pass manager unless your parent module pass has received a proxy result
|
||||
/// object for it.
|
||||
/// Fore example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
|
||||
/// an analysis over Modules (the "outer" unit) that provides access to a
|
||||
/// Function analysis manager. The FunctionAnalysisManager is the "inner"
|
||||
/// manager being proxied, and Functions are the "inner" unit. The inner/outer
|
||||
/// relationship is valid because each Function is contained in one Module.
|
||||
///
|
||||
/// Note that the proxy's result is a move-only object and represents ownership
|
||||
/// of the validity of the analyses in the \c FunctionAnalysisManager it
|
||||
/// provides.
|
||||
/// If you're (transitively) within a pass manager for an IR unit U that
|
||||
/// contains IR unit V, you should never use an analysis manager over V, except
|
||||
/// via one of these proxies.
|
||||
///
|
||||
/// Note that the proxy's result is a move-only RAII object. The validity of
|
||||
/// the analyses in the inner analysis manager is tied to its lifetime.
|
||||
template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
|
||||
class InnerAnalysisManagerProxy
|
||||
: public AnalysisInfoMixin<
|
||||
@ -926,23 +930,16 @@ class InnerAnalysisManagerProxy
|
||||
/// \brief Accessor for the analysis manager.
|
||||
AnalysisManagerT &getManager() { return *InnerAM; }
|
||||
|
||||
/// \brief Handler for invalidation of the outer IR unit.
|
||||
/// \brief Handler for invalidation of the outer IR unit, \c IRUnitT.
|
||||
///
|
||||
/// If this analysis itself is preserved, then we assume that the set of \c
|
||||
/// IR units that the inner analysis manager controls hasn't changed and
|
||||
/// thus we don't need to invalidate *all* cached data associated with any
|
||||
/// \c IRUnitT* in the \c AnalysisManagerT.
|
||||
/// If the proxy analysis itself is not preserved, we assume that the set of
|
||||
/// inner IR objects contained in IRUnit may have changed. In this case,
|
||||
/// we have to call \c clear() on the inner analysis manager, as it may now
|
||||
/// have stale pointers to its inner IR objects.
|
||||
///
|
||||
/// Regardless of whether this analysis is marked as preserved, all of the
|
||||
/// analyses in the \c AnalysisManagerT are potentially invalidated (for
|
||||
/// the relevant inner set of their IR units) based on the set of preserved
|
||||
/// analyses.
|
||||
///
|
||||
/// Because this needs to understand the mapping from one IR unit to an
|
||||
/// inner IR unit, this method isn't defined in the primary template.
|
||||
/// Instead, each specialization of this template will need to provide an
|
||||
/// explicit specialization of this method to handle that particular pair
|
||||
/// of IR unit and inner AnalysisManagerT.
|
||||
/// Regardless of whether the proxy analysis is marked as preserved, all of
|
||||
/// the analyses in the inner analysis manager are potentially invalidated
|
||||
/// based on the set of preserved analyses.
|
||||
bool invalidate(
|
||||
IRUnitT &IR, const PreservedAnalyses &PA,
|
||||
typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &Inv);
|
||||
@ -956,13 +953,9 @@ class InnerAnalysisManagerProxy
|
||||
|
||||
/// \brief Run the analysis pass and create our proxy result object.
|
||||
///
|
||||
/// This doesn't do any interesting work, it is primarily used to insert our
|
||||
/// proxy result object into the module analysis cache so that we can proxy
|
||||
/// invalidation to the function analysis manager.
|
||||
///
|
||||
/// In debug builds, it will also assert that the analysis manager is empty
|
||||
/// as no queries should arrive at the function analysis manager prior to
|
||||
/// this analysis being requested.
|
||||
/// This doesn't do any interesting work; it is primarily used to insert our
|
||||
/// proxy result object into the outer analysis cache so that we can proxy
|
||||
/// invalidation to the inner analysis manager.
|
||||
Result run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
|
||||
ExtraArgTs...) {
|
||||
return Result(*InnerAM);
|
||||
@ -996,22 +989,24 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
|
||||
extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
|
||||
Module>;
|
||||
|
||||
/// \brief A function analysis which acts as a proxy for a module analysis
|
||||
/// manager.
|
||||
/// \brief An analysis over an "inner" IR unit that provides access to an
|
||||
/// analysis manager over a "outer" IR unit. The inner unit must be contained
|
||||
/// in the outer unit.
|
||||
///
|
||||
/// This primarily provides an accessor to a parent module analysis manager to
|
||||
/// function passes. Only the const interface of the module analysis manager is
|
||||
/// provided to indicate that once inside of a function analysis pass you
|
||||
/// cannot request a module analysis to actually run. Instead, the user must
|
||||
/// rely on the \c getCachedResult API.
|
||||
/// For example OuterAnalysisManagerProxy<ModuleAnalysisManager, Function> is an
|
||||
/// analysis over Functions (the "inner" unit) which provides access to a Module
|
||||
/// analysis manager. The ModuleAnalysisManager is the "outer" manager being
|
||||
/// proxied, and Modules are the "outer" IR unit. The inner/outer relationship
|
||||
/// is valid because each Function is contained in one Module.
|
||||
///
|
||||
/// The invalidation provided by this proxy involves tracking when an
|
||||
/// invalidation event in the outer analysis manager needs to trigger an
|
||||
/// invalidation of a particular analysis on this IR unit.
|
||||
/// This proxy only exposes the const interface of the outer analysis manager,
|
||||
/// to indicate that you cannot cause an outer analysis to run from within an
|
||||
/// inner pass. Instead, you must rely on the \c getCachedResult API.
|
||||
///
|
||||
/// Because outer analyses aren't invalidated while these IR units are being
|
||||
/// precessed, we have to register and handle these as deferred invalidation
|
||||
/// events.
|
||||
/// This proxy doesn't manage invalidation in any way -- that is handled by the
|
||||
/// recursive return path of each layer of the pass manager. A consequence of
|
||||
/// this is the outer analyses may be stale. We invalidate the outer analyses
|
||||
/// only when we're done running passes over the inner IR units.
|
||||
template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
|
||||
class OuterAnalysisManagerProxy
|
||||
: public AnalysisInfoMixin<
|
||||
@ -1024,7 +1019,7 @@ class OuterAnalysisManagerProxy
|
||||
|
||||
const AnalysisManagerT &getManager() const { return *AM; }
|
||||
|
||||
/// \brief Handle invalidation by ignoring it, this pass is immutable.
|
||||
/// \brief Handle invalidation by ignoring it; this pass is immutable.
|
||||
bool invalidate(
|
||||
IRUnitT &, const PreservedAnalyses &,
|
||||
typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &) {
|
||||
@ -1089,18 +1084,15 @@ AnalysisKey
|
||||
|
||||
extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
|
||||
Function>;
|
||||
/// Provide the \c ModuleAnalysisManager to \c Fucntion proxy.
|
||||
/// Provide the \c ModuleAnalysisManager to \c Function proxy.
|
||||
typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
|
||||
ModuleAnalysisManagerFunctionProxy;
|
||||
|
||||
/// \brief Trivial adaptor that maps from a module to its functions.
|
||||
///
|
||||
/// Designed to allow composition of a FunctionPass(Manager) and
|
||||
/// a ModulePassManager. Note that if this pass is constructed with a pointer
|
||||
/// to a \c ModuleAnalysisManager it will run the
|
||||
/// \c FunctionAnalysisManagerModuleProxy analysis prior to running the function
|
||||
/// pass over the module to enable a \c FunctionAnalysisManager to be used
|
||||
/// within this run safely.
|
||||
/// a ModulePassManager, by running the FunctionPass(Manager) over every
|
||||
/// function in the module.
|
||||
///
|
||||
/// Function passes run within this adaptor can rely on having exclusive access
|
||||
/// to the function they are run over. They should not read or modify any other
|
||||
@ -1115,6 +1107,10 @@ typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
|
||||
/// module.
|
||||
/// FIXME: Make the above true for all of LLVM's actual passes, some still
|
||||
/// violate this principle.
|
||||
///
|
||||
/// Note that although function passes can access module analyses, module
|
||||
/// analyses are not invalidated while the function passes are running, so they
|
||||
/// may be stale. Function analyses will not be stale.
|
||||
template <typename FunctionPassT>
|
||||
class ModuleToFunctionPassAdaptor
|
||||
: public PassInfoMixin<ModuleToFunctionPassAdaptor<FunctionPassT>> {
|
||||
@ -1124,7 +1120,6 @@ class ModuleToFunctionPassAdaptor
|
||||
|
||||
/// \brief Runs the function pass across every function in the module.
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
|
||||
// Setup the function analysis manager from its proxy.
|
||||
FunctionAnalysisManager &FAM =
|
||||
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
||||
|
||||
@ -1145,10 +1140,11 @@ class ModuleToFunctionPassAdaptor
|
||||
PA.intersect(std::move(PassPA));
|
||||
}
|
||||
|
||||
// By definition we preserve the proxy. We also preserve all analyses on
|
||||
// Function units. This precludes *any* invalidation of function analyses
|
||||
// by the proxy, but that's OK because we've taken care to invalidate
|
||||
// analyses in the function analysis manager incrementally above.
|
||||
// The FunctionAnalysisManagerModuleProxy is preserved because (we assume)
|
||||
// the function passes we ran didn't add or remove any functions.
|
||||
//
|
||||
// We also preserve all analyses on Functions, because we did all the
|
||||
// invalidation we needed to do above.
|
||||
PA.preserveSet<AllAnalysesOn<Function>>();
|
||||
PA.preserve<FunctionAnalysisManagerModuleProxy>();
|
||||
return PA;
|
||||
@ -1166,7 +1162,7 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
|
||||
return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
|
||||
}
|
||||
|
||||
/// \brief A template utility pass to force an analysis result to be available.
|
||||
/// \brief A utility pass template to force an analysis result to be available.
|
||||
///
|
||||
/// If there are extra arguments at the pass's run level there may also be
|
||||
/// extra arguments to the analysis manager's \c getResult routine. We can't
|
||||
@ -1196,17 +1192,14 @@ struct RequireAnalysisPass
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief A template utility pass to force an analysis result to be
|
||||
/// invalidated.
|
||||
///
|
||||
/// This is a no-op pass which simply forces a specific analysis result to be
|
||||
/// invalidated when it is run.
|
||||
/// \brief A no-op pass template which simply forces a specific analysis result
|
||||
/// to be invalidated.
|
||||
template <typename AnalysisT>
|
||||
struct InvalidateAnalysisPass
|
||||
: PassInfoMixin<InvalidateAnalysisPass<AnalysisT>> {
|
||||
/// \brief Run this pass over some unit of IR.
|
||||
///
|
||||
/// This pass can be run over any unit of IR and use any analysis manager
|
||||
/// This pass can be run over any unit of IR and use any analysis manager,
|
||||
/// provided they satisfy the basic API requirements. When this pass is
|
||||
/// created, these methods can be instantiated to satisfy whatever the
|
||||
/// context requires.
|
||||
@ -1218,10 +1211,10 @@ struct InvalidateAnalysisPass
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief A utility pass that does nothing but preserves no analyses.
|
||||
/// \brief A utility pass that does nothing, but preserves no analyses.
|
||||
///
|
||||
/// As a consequence fo not preserving any analyses, this pass will force all
|
||||
/// analysis passes to be re-run to produce fresh results if any are needed.
|
||||
/// Because this preserves no analyses, any analysis passes queried after this
|
||||
/// pass runs will recompute fresh results.
|
||||
struct InvalidateAllAnalysesPass : PassInfoMixin<InvalidateAllAnalysesPass> {
|
||||
/// \brief Run this pass over some unit of IR.
|
||||
template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
|
||||
|
@ -238,6 +238,26 @@ class User : public Value {
|
||||
return make_range(value_op_begin(), value_op_end());
|
||||
}
|
||||
|
||||
struct const_value_op_iterator
|
||||
: iterator_adaptor_base<const_value_op_iterator, const_op_iterator,
|
||||
std::random_access_iterator_tag, const Value *,
|
||||
ptrdiff_t, const Value *, const Value *> {
|
||||
explicit const_value_op_iterator(const Use *U = nullptr) :
|
||||
iterator_adaptor_base(U) {}
|
||||
const Value *operator*() const { return *I; }
|
||||
const Value *operator->() const { return operator*(); }
|
||||
};
|
||||
|
||||
const_value_op_iterator value_op_begin() const {
|
||||
return const_value_op_iterator(op_begin());
|
||||
}
|
||||
const_value_op_iterator value_op_end() const {
|
||||
return const_value_op_iterator(op_end());
|
||||
}
|
||||
iterator_range<const_value_op_iterator> operand_values() const {
|
||||
return make_range(value_op_begin(), value_op_end());
|
||||
}
|
||||
|
||||
/// \brief Drop all references to operands.
|
||||
///
|
||||
/// This function is in charge of "letting go" of all objects that this User
|
||||
|
@ -207,6 +207,14 @@ void native(const Twine &path, SmallVectorImpl<char> &result);
|
||||
/// @param path A path that is transformed to native format.
|
||||
void native(SmallVectorImpl<char> &path);
|
||||
|
||||
/// @brief Replaces backslashes with slashes if Windows.
|
||||
///
|
||||
/// @param path processed path
|
||||
/// @result The result of replacing backslashes with forward slashes if Windows.
|
||||
/// On Unix, this function is a no-op because backslashes are valid path
|
||||
/// chracters.
|
||||
std::string convert_to_slash(StringRef path);
|
||||
|
||||
/// @}
|
||||
/// @name Lexical Observers
|
||||
/// @{
|
||||
|
@ -215,9 +215,20 @@ ModulePass *createMetaRenamerPass();
|
||||
/// manager.
|
||||
ModulePass *createBarrierNoopPass();
|
||||
|
||||
/// What to do with the summary when running the LowerTypeTests pass.
|
||||
enum class LowerTypeTestsSummaryAction {
|
||||
None, ///< Do nothing.
|
||||
Import, ///< Import typeid resolutions from summary and globals.
|
||||
Export, ///< Export typeid resolutions to summary and globals.
|
||||
};
|
||||
|
||||
/// \brief This pass lowers type metadata and the llvm.type.test intrinsic to
|
||||
/// bitsets.
|
||||
ModulePass *createLowerTypeTestsPass();
|
||||
/// \param Action What to do with the summary passed as Index.
|
||||
/// \param Index The summary to use for importing or exporting, this can be null
|
||||
/// when Action is None.
|
||||
ModulePass *createLowerTypeTestsPass(LowerTypeTestsSummaryAction Action,
|
||||
ModuleSummaryIndex *Index);
|
||||
|
||||
/// \brief This pass export CFI checks for use by external modules.
|
||||
ModulePass *createCrossDSOCFIPass();
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
class ModuleSummaryIndex;
|
||||
class Pass;
|
||||
class TargetLibraryInfoImpl;
|
||||
class TargetMachine;
|
||||
|
@ -1106,6 +1106,16 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
|
||||
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
|
||||
return V;
|
||||
|
||||
// udiv %V, C -> 0 if %V < C
|
||||
if (MaxRecurse) {
|
||||
if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
|
||||
ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
|
||||
if (C->isAllOnesValue()) {
|
||||
return Constant::getNullValue(Op0->getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -1247,6 +1257,16 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
|
||||
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
|
||||
return V;
|
||||
|
||||
// urem %V, C -> %V if %V < C
|
||||
if (MaxRecurse) {
|
||||
if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
|
||||
ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
|
||||
if (C->isAllOnesValue()) {
|
||||
return Op0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -179,9 +179,9 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
|
||||
}
|
||||
|
||||
bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
|
||||
// For each block we check that it doesn't have any uses outside of it's
|
||||
// innermost loop. This process will transitivelly guarntee that current loop
|
||||
// and all of the nested loops are in the LCSSA form.
|
||||
// For each block we check that it doesn't have any uses outside of its
|
||||
// innermost loop. This process will transitively guarantee that the current
|
||||
// loop and all of the nested loops are in LCSSA form.
|
||||
return all_of(this->blocks(), [&](const BasicBlock *BB) {
|
||||
return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
|
||||
});
|
||||
|
@ -344,38 +344,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
|
||||
if (!InvariantGroupMD)
|
||||
return MemDepResult::getUnknown();
|
||||
|
||||
Value *LoadOperand = LI->getPointerOperand();
|
||||
// Take the ptr operand after all casts and geps 0. This way we can search
|
||||
// cast graph down only.
|
||||
Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
|
||||
|
||||
// It's is not safe to walk the use list of global value, because function
|
||||
// passes aren't allowed to look outside their functions.
|
||||
// FIXME: this could be fixed by filtering instructions from outside
|
||||
// of current function.
|
||||
if (isa<GlobalValue>(LoadOperand))
|
||||
return MemDepResult::getUnknown();
|
||||
|
||||
// Queue to process all pointers that are equivalent to load operand.
|
||||
SmallVector<const Value *, 8> LoadOperandsQueue;
|
||||
SmallSet<const Value *, 14> SeenValues;
|
||||
auto TryInsertToQueue = [&](Value *V) {
|
||||
if (SeenValues.insert(V).second)
|
||||
LoadOperandsQueue.push_back(V);
|
||||
};
|
||||
|
||||
TryInsertToQueue(LoadOperand);
|
||||
LoadOperandsQueue.push_back(LoadOperand);
|
||||
while (!LoadOperandsQueue.empty()) {
|
||||
const Value *Ptr = LoadOperandsQueue.pop_back_val();
|
||||
assert(Ptr);
|
||||
if (isa<GlobalValue>(Ptr))
|
||||
continue;
|
||||
|
||||
// Value comes from bitcast: Ptr = bitcast x. Insert x.
|
||||
if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
|
||||
TryInsertToQueue(BCI->getOperand(0));
|
||||
// Gep with zeros is equivalent to bitcast.
|
||||
// FIXME: we are not sure if some bitcast should be canonicalized to gep 0
|
||||
// or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
|
||||
// pointers will be upstream then both cases will be gone (and this BFS
|
||||
// also won't be needed).
|
||||
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
|
||||
if (GEP->hasAllZeroIndices())
|
||||
TryInsertToQueue(GEP->getOperand(0));
|
||||
assert(Ptr && !isa<GlobalValue>(Ptr) &&
|
||||
"Null or GlobalValue should not be inserted");
|
||||
|
||||
for (const Use &Us : Ptr->uses()) {
|
||||
auto *U = dyn_cast<Instruction>(Us.getUser());
|
||||
@ -385,13 +371,17 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
|
||||
// Bitcast or gep with zeros are using Ptr. Add to queue to check it's
|
||||
// users. U = bitcast Ptr
|
||||
if (isa<BitCastInst>(U)) {
|
||||
TryInsertToQueue(U);
|
||||
LoadOperandsQueue.push_back(U);
|
||||
continue;
|
||||
}
|
||||
// U = getelementptr Ptr, 0, 0...
|
||||
// Gep with zeros is equivalent to bitcast.
|
||||
// FIXME: we are not sure if some bitcast should be canonicalized to gep 0
|
||||
// or gep 0 to bitcast because of SROA, so there are 2 forms. When
|
||||
// typeless pointers will be ready then both cases will be gone
|
||||
// (and this BFS also won't be needed).
|
||||
if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
|
||||
if (GEP->hasAllZeroIndices()) {
|
||||
TryInsertToQueue(U);
|
||||
LoadOperandsQueue.push_back(U);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -10012,6 +10012,18 @@ void ScalarEvolution::verify() const {
|
||||
// TODO: Verify more things.
|
||||
}
|
||||
|
||||
bool ScalarEvolution::invalidate(
|
||||
Function &F, const PreservedAnalyses &PA,
|
||||
FunctionAnalysisManager::Invalidator &Inv) {
|
||||
// Invalidate the ScalarEvolution object whenever it isn't preserved or one
|
||||
// of its dependencies is invalidated.
|
||||
auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
|
||||
return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
|
||||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
|
||||
Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
|
||||
Inv.invalidate<LoopAnalysis>(F, PA);
|
||||
}
|
||||
|
||||
AnalysisKey ScalarEvolutionAnalysis::Key;
|
||||
|
||||
ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
|
||||
|
@ -3257,6 +3257,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
|
||||
case Intrinsic::dbg_value:
|
||||
return true;
|
||||
|
||||
case Intrinsic::bitreverse:
|
||||
case Intrinsic::bswap:
|
||||
case Intrinsic::ctlz:
|
||||
case Intrinsic::ctpop:
|
||||
|
@ -429,7 +429,7 @@ class MetadataLoader::MetadataLoaderImpl {
|
||||
/// Populate the index above to enable lazily loading of metadata, and load
|
||||
/// the named metadata as well as the transitively referenced global
|
||||
/// Metadata.
|
||||
Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
|
||||
Expected<bool> lazyLoadModuleMetadataBlock();
|
||||
|
||||
/// On-demand loading of a single metadata. Requires the index above to be
|
||||
/// populated.
|
||||
@ -516,8 +516,8 @@ Error error(const Twine &Message) {
|
||||
Message, make_error_code(BitcodeError::CorruptedBitcode));
|
||||
}
|
||||
|
||||
Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
|
||||
PlaceholderQueue &Placeholders) {
|
||||
Expected<bool>
|
||||
MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
|
||||
IndexCursor = Stream;
|
||||
SmallVector<uint64_t, 64> Record;
|
||||
// Get the abbrevs, and preload record positions to make them lazy-loadable.
|
||||
@ -701,7 +701,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
|
||||
// then load individual record as needed, starting with the named metadata.
|
||||
if (ModuleLevel && IsImporting && MetadataList.empty() &&
|
||||
!DisableLazyLoading) {
|
||||
auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
|
||||
auto SuccessOrErr = lazyLoadModuleMetadataBlock();
|
||||
if (!SuccessOrErr)
|
||||
return SuccessOrErr.takeError();
|
||||
if (SuccessOrErr.get()) {
|
||||
@ -1561,7 +1561,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
|
||||
return error("Invalid record");
|
||||
|
||||
SmallVector<uint64_t, 64> Record;
|
||||
|
||||
PlaceholderQueue Placeholders;
|
||||
|
||||
while (true) {
|
||||
@ -1608,10 +1607,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
|
||||
|
||||
auto Idx = Record[i + 1];
|
||||
if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
|
||||
!MetadataList.lookup(Idx))
|
||||
!MetadataList.lookup(Idx)) {
|
||||
// Load the attachment if it is in the lazy-loadable range and hasn't
|
||||
// been loaded yet.
|
||||
lazyLoadOneMetadata(Idx, Placeholders);
|
||||
resolveForwardRefsAndPlaceholders(Placeholders);
|
||||
}
|
||||
|
||||
Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
|
||||
if (isa<LocalAsMetadata>(Node))
|
||||
|
@ -1714,7 +1714,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
|
||||
EVT CCT = getSetCCResultType(NVT);
|
||||
|
||||
// Hi part is always the same op
|
||||
Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
|
||||
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
|
||||
|
||||
// We need to know whether to select Lo part that corresponds to 'winning'
|
||||
// Hi part or if Hi parts are equal.
|
||||
@ -1725,7 +1725,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
|
||||
SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
|
||||
|
||||
// Recursed Lo part if Hi parts are equal, this uses unsigned version
|
||||
SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
|
||||
SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
|
||||
|
||||
Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
|
||||
}
|
||||
|
@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
|
||||
I != E; ++I) {
|
||||
if (DCELimit != -1 && (int)NumDead >= DCELimit)
|
||||
break;
|
||||
|
||||
int FirstSS, SecondSS;
|
||||
if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
|
||||
FirstSS != -1) {
|
||||
@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator NextMI = std::next(I);
|
||||
if (NextMI == MBB->end()) continue;
|
||||
MachineBasicBlock::iterator ProbableLoadMI = I;
|
||||
|
||||
unsigned LoadReg = 0;
|
||||
unsigned StoreReg = 0;
|
||||
if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
|
||||
continue;
|
||||
// Skip the ...pseudo debugging... instructions between a load and store.
|
||||
while ((NextMI != E) && NextMI->isDebugValue()) {
|
||||
++NextMI;
|
||||
++I;
|
||||
}
|
||||
if (NextMI == E) continue;
|
||||
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
|
||||
continue;
|
||||
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
|
||||
@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
|
||||
|
||||
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
|
||||
++NumDead;
|
||||
toErase.push_back(&*I);
|
||||
toErase.push_back(&*ProbableLoadMI);
|
||||
}
|
||||
|
||||
toErase.push_back(&*NextMI);
|
||||
|
@ -896,6 +896,48 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
|
||||
return ELF::R_MIPS_NONE;
|
||||
}
|
||||
|
||||
// Sometimes we don't need to create thunk for a branch.
|
||||
// This typically happens when branch target is located
|
||||
// in the same object file. In such case target is either
|
||||
// a weak symbol or symbol in a different executable section.
|
||||
// This function checks if branch target is located in the
|
||||
// same object file and if distance between source and target
|
||||
// fits R_AARCH64_CALL26 relocation. If both conditions are
|
||||
// met, it emits direct jump to the target and returns true.
|
||||
// Otherwise false is returned and thunk is created.
|
||||
bool RuntimeDyldELF::resolveAArch64ShortBranch(
|
||||
unsigned SectionID, relocation_iterator RelI,
|
||||
const RelocationValueRef &Value) {
|
||||
uint64_t Address;
|
||||
if (Value.SymbolName) {
|
||||
auto Loc = GlobalSymbolTable.find(Value.SymbolName);
|
||||
|
||||
// Don't create direct branch for external symbols.
|
||||
if (Loc == GlobalSymbolTable.end())
|
||||
return false;
|
||||
|
||||
const auto &SymInfo = Loc->second;
|
||||
Address =
|
||||
uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset(
|
||||
SymInfo.getOffset()));
|
||||
} else {
|
||||
Address = uint64_t(Sections[Value.SectionID].getLoadAddress());
|
||||
}
|
||||
uint64_t Offset = RelI->getOffset();
|
||||
uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset);
|
||||
|
||||
// R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27
|
||||
// If distance between source and target is out of range then we should
|
||||
// create thunk.
|
||||
if (!isInt<28>(Address + Value.Addend - SourceAddress))
|
||||
return false;
|
||||
|
||||
resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(),
|
||||
Value.Addend);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Expected<relocation_iterator>
|
||||
RuntimeDyldELF::processRelocationRef(
|
||||
unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
|
||||
@ -1003,7 +1045,7 @@ RuntimeDyldELF::processRelocationRef(
|
||||
(uint64_t)Section.getAddressWithOffset(i->second),
|
||||
RelType, 0);
|
||||
DEBUG(dbgs() << " Stub function found\n");
|
||||
} else {
|
||||
} else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
|
||||
// Create a new stub function.
|
||||
DEBUG(dbgs() << " Create a new stub function\n");
|
||||
Stubs[Value] = Section.getStubOffset();
|
||||
|
@ -40,6 +40,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
|
||||
void resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset,
|
||||
uint64_t Value, uint32_t Type, int64_t Addend);
|
||||
|
||||
bool resolveAArch64ShortBranch(unsigned SectionID, relocation_iterator RelI,
|
||||
const RelocationValueRef &Value);
|
||||
|
||||
void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
|
||||
uint32_t Value, uint32_t Type, int32_t Addend);
|
||||
|
||||
|
@ -196,8 +196,15 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
|
||||
};
|
||||
|
||||
FunctionImporter Importer(Index, Loader);
|
||||
if (!Importer.importFunctions(TheModule, ImportList))
|
||||
Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
|
||||
if (!Result) {
|
||||
handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
|
||||
SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
|
||||
SourceMgr::DK_Error, EIB.message());
|
||||
Err.print("ThinLTO", errs());
|
||||
});
|
||||
report_fatal_error("importFunctions failed");
|
||||
}
|
||||
}
|
||||
|
||||
static void optimizeModule(Module &TheModule, TargetMachine &TM,
|
||||
|
@ -2823,7 +2823,11 @@ StringRef MachORebaseEntry::typeName() const {
|
||||
}
|
||||
|
||||
bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
|
||||
#else
|
||||
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
|
||||
#endif
|
||||
return (Ptr == Other.Ptr) &&
|
||||
(RemainingLoopCount == Other.RemainingLoopCount) &&
|
||||
(Done == Other.Done);
|
||||
@ -3073,7 +3077,11 @@ uint32_t MachOBindEntry::flags() const { return Flags; }
|
||||
int MachOBindEntry::ordinal() const { return Ordinal; }
|
||||
|
||||
bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
|
||||
#else
|
||||
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
|
||||
#endif
|
||||
return (Ptr == Other.Ptr) &&
|
||||
(RemainingLoopCount == Other.RemainingLoopCount) &&
|
||||
(Done == Other.Done);
|
||||
|
@ -22,6 +22,12 @@
|
||||
using namespace llvm;
|
||||
using namespace object;
|
||||
|
||||
static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
|
||||
"ignore-empty-index-file", llvm::cl::ZeroOrMore,
|
||||
llvm::cl::desc(
|
||||
"Ignore an empty index file and perform non-ThinLTO compilation"),
|
||||
llvm::cl::init(false));
|
||||
|
||||
ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile(
|
||||
MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I)
|
||||
: SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) {
|
||||
@ -97,6 +103,8 @@ llvm::getModuleSummaryIndexForFile(StringRef Path) {
|
||||
if (EC)
|
||||
return errorCodeToError(EC);
|
||||
MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
|
||||
if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize())
|
||||
return nullptr;
|
||||
Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
|
||||
object::ModuleSummaryIndexObjectFile::create(BufferRef);
|
||||
if (!ObjOrErr)
|
||||
|
@ -373,7 +373,7 @@ void Option::removeArgument() { GlobalParser->removeOption(this); }
|
||||
void Option::setArgStr(StringRef S) {
|
||||
if (FullyInitialized)
|
||||
GlobalParser->updateArgStr(this, S);
|
||||
assert(S[0] != '-' && "Option can't start with '-");
|
||||
assert((S.empty() || S[0] != '-') && "Option can't start with '-");
|
||||
ArgStr = S;
|
||||
}
|
||||
|
||||
|
@ -571,6 +571,16 @@ void native(SmallVectorImpl<char> &Path) {
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string convert_to_slash(StringRef path) {
|
||||
#ifdef LLVM_ON_WIN32
|
||||
std::string s = path.str();
|
||||
std::replace(s.begin(), s.end(), '\\', '/');
|
||||
return s;
|
||||
#else
|
||||
return path;
|
||||
#endif
|
||||
}
|
||||
|
||||
StringRef filename(StringRef path) {
|
||||
return *rbegin(path);
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -109,27 +110,44 @@ static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
|
||||
pad(OS);
|
||||
}
|
||||
|
||||
// In the Ustar header, a path can be split at any '/' to store
|
||||
// a path into UstarHeader::Name and UstarHeader::Prefix. This
|
||||
// function splits a given path for that purpose.
|
||||
static std::pair<StringRef, StringRef> splitPath(StringRef Path) {
|
||||
if (Path.size() <= sizeof(UstarHeader::Name))
|
||||
return {"", Path};
|
||||
size_t Sep = Path.rfind('/', sizeof(UstarHeader::Name) + 1);
|
||||
if (Sep == StringRef::npos)
|
||||
return {"", Path};
|
||||
return {Path.substr(0, Sep), Path.substr(Sep + 1)};
|
||||
}
|
||||
|
||||
// Returns true if a given path can be stored to a Ustar header
|
||||
// without the PAX extension.
|
||||
static bool fitsInUstar(StringRef Path) {
|
||||
StringRef Prefix;
|
||||
StringRef Name;
|
||||
std::tie(Prefix, Name) = splitPath(Path);
|
||||
return Name.size() <= sizeof(UstarHeader::Name);
|
||||
}
|
||||
|
||||
// The PAX header is an extended format, so a PAX header needs
|
||||
// to be followed by a "real" header.
|
||||
static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
|
||||
StringRef Prefix;
|
||||
StringRef Name;
|
||||
std::tie(Prefix, Name) = splitPath(Path);
|
||||
|
||||
UstarHeader Hdr = {};
|
||||
memcpy(Hdr.Name, Path.data(), Path.size());
|
||||
memcpy(Hdr.Name, Name.data(), Name.size());
|
||||
memcpy(Hdr.Mode, "0000664", 8);
|
||||
snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
|
||||
memcpy(Hdr.Magic, "ustar", 6);
|
||||
memcpy(Hdr.Prefix, Prefix.data(), Prefix.size());
|
||||
computeChecksum(Hdr);
|
||||
OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
|
||||
}
|
||||
|
||||
// We want to use '/' as a path separator even on Windows.
|
||||
// This function canonicalizes a given path.
|
||||
static std::string canonicalize(std::string S) {
|
||||
#ifdef LLVM_ON_WIN32
|
||||
std::replace(S.begin(), S.end(), '\\', '/');
|
||||
#endif
|
||||
return S;
|
||||
}
|
||||
|
||||
// Creates a TarWriter instance and returns it.
|
||||
Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
|
||||
StringRef BaseDir) {
|
||||
@ -145,8 +163,8 @@ TarWriter::TarWriter(int FD, StringRef BaseDir)
|
||||
// Append a given file to an archive.
|
||||
void TarWriter::append(StringRef Path, StringRef Data) {
|
||||
// Write Path and Data.
|
||||
std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
|
||||
if (S.size() <= sizeof(UstarHeader::Name)) {
|
||||
std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0";
|
||||
if (fitsInUstar(S)) {
|
||||
writeUstarHeader(OS, S, Data.size());
|
||||
} else {
|
||||
writePaxHeader(OS, S);
|
||||
|
@ -608,6 +608,10 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
||||
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
|
||||
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
||||
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
||||
Base = Addr.getOperand(0);
|
||||
|
@ -172,16 +172,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::STORE, MVT::v2f64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
|
||||
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
||||
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
|
||||
setTruncStoreAction(MVT::i64, MVT::i8, Expand);
|
||||
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
|
||||
|
@ -822,6 +822,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
|
||||
bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
|
||||
bool isForcedDPP() const { return ForcedDPP; }
|
||||
bool isForcedSDWA() const { return ForcedSDWA; }
|
||||
ArrayRef<unsigned> getMatchedVariants() const;
|
||||
|
||||
std::unique_ptr<AMDGPUOperand> parseRegister();
|
||||
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
|
||||
@ -1630,31 +1631,44 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
|
||||
return Match_Success;
|
||||
}
|
||||
|
||||
// What asm variants we should check
|
||||
ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
|
||||
if (getForcedEncodingSize() == 32) {
|
||||
static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
|
||||
return makeArrayRef(Variants);
|
||||
}
|
||||
|
||||
if (isForcedVOP3()) {
|
||||
static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
|
||||
return makeArrayRef(Variants);
|
||||
}
|
||||
|
||||
if (isForcedSDWA()) {
|
||||
static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA};
|
||||
return makeArrayRef(Variants);
|
||||
}
|
||||
|
||||
if (isForcedDPP()) {
|
||||
static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
|
||||
return makeArrayRef(Variants);
|
||||
}
|
||||
|
||||
static const unsigned Variants[] = {
|
||||
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
|
||||
AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP
|
||||
};
|
||||
|
||||
return makeArrayRef(Variants);
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
OperandVector &Operands,
|
||||
MCStreamer &Out,
|
||||
uint64_t &ErrorInfo,
|
||||
bool MatchingInlineAsm) {
|
||||
// What asm variants we should check
|
||||
std::vector<unsigned> MatchedVariants;
|
||||
if (getForcedEncodingSize() == 32) {
|
||||
MatchedVariants = {AMDGPUAsmVariants::DEFAULT};
|
||||
} else if (isForcedVOP3()) {
|
||||
MatchedVariants = {AMDGPUAsmVariants::VOP3};
|
||||
} else if (isForcedSDWA()) {
|
||||
MatchedVariants = {AMDGPUAsmVariants::SDWA};
|
||||
} else if (isForcedDPP()) {
|
||||
MatchedVariants = {AMDGPUAsmVariants::DPP};
|
||||
} else {
|
||||
MatchedVariants = {AMDGPUAsmVariants::DEFAULT,
|
||||
AMDGPUAsmVariants::VOP3,
|
||||
AMDGPUAsmVariants::SDWA,
|
||||
AMDGPUAsmVariants::DPP};
|
||||
}
|
||||
|
||||
MCInst Inst;
|
||||
unsigned Result = Match_Success;
|
||||
for (auto Variant : MatchedVariants) {
|
||||
for (auto Variant : getMatchedVariants()) {
|
||||
uint64_t EI;
|
||||
auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
|
||||
Variant);
|
||||
@ -3486,7 +3500,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
|
||||
for (unsigned E = Operands.size(); I != E; ++I) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||
// Add the register arguments
|
||||
if ((BasicInstType == SIInstrFlags::VOPC ||
|
||||
if ((BasicInstType == SIInstrFlags::VOPC ||
|
||||
BasicInstType == SIInstrFlags::VOP2)&&
|
||||
Op.isReg() &&
|
||||
Op.Reg.RegNo == AMDGPU::VCC) {
|
||||
|
@ -99,6 +99,18 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
||||
|
||||
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
|
||||
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
|
||||
// We need to include these since trunc STORES to PRIVATE need
|
||||
// special handling to accommodate RMW
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
|
||||
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
|
||||
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
|
||||
|
||||
// Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
|
||||
@ -1087,79 +1099,114 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
|
||||
SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Store);
|
||||
//TODO: Who creates the i8 stores?
|
||||
assert(Store->isTruncatingStore()
|
||||
|| Store->getValue().getValueType() == MVT::i8);
|
||||
assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
unsigned Mask = 0;
|
||||
SDValue Mask;
|
||||
if (Store->getMemoryVT() == MVT::i8) {
|
||||
Mask = 0xff;
|
||||
assert(Store->getAlignment() >= 1);
|
||||
Mask = DAG.getConstant(0xff, DL, MVT::i32);
|
||||
} else if (Store->getMemoryVT() == MVT::i16) {
|
||||
Mask = 0xffff;
|
||||
assert(Store->getAlignment() >= 2);
|
||||
Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
|
||||
} else {
|
||||
llvm_unreachable("Unsupported private trunc store");
|
||||
}
|
||||
|
||||
SDValue Chain = Store->getChain();
|
||||
SDValue BasePtr = Store->getBasePtr();
|
||||
SDValue Offset = Store->getOffset();
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
SDValue LoadPtr = BasePtr;
|
||||
if (!Offset.isUndef()) {
|
||||
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
|
||||
}
|
||||
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
|
||||
// Get dword location
|
||||
// TODO: this should be eliminated by the future SHR ptr, 2
|
||||
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
||||
DAG.getConstant(0xfffffffc, DL, MVT::i32));
|
||||
|
||||
// Load dword
|
||||
// TODO: can we be smarter about machine pointer info?
|
||||
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
|
||||
|
||||
Chain = Dst.getValue(1);
|
||||
|
||||
// Get offset in dword
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
// Convert byte offset to bit shift
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
// TODO: Contrary to the name of the functiom,
|
||||
// it also handles sub i32 non-truncating stores (like i1)
|
||||
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
|
||||
Store->getValue());
|
||||
|
||||
// Mask the value to the right type
|
||||
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
|
||||
|
||||
// Shift the value in place
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
MaskedValue, ShiftAmt);
|
||||
|
||||
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
||||
DAG.getConstant(Mask, DL, MVT::i32),
|
||||
ShiftAmt);
|
||||
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
|
||||
DAG.getConstant(0xffffffff, DL, MVT::i32));
|
||||
// Shift the mask in place
|
||||
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
|
||||
|
||||
// Invert the mask. NOTE: if we had native ROL instructions we could
|
||||
// use inverted mask
|
||||
DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
|
||||
|
||||
// Cleanup the target bits
|
||||
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
|
||||
|
||||
// Add the new bits
|
||||
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
|
||||
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Value, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32));
|
||||
|
||||
// Store dword
|
||||
// TODO: Can we be smarter about MachinePointerInfo?
|
||||
return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
||||
unsigned AS = StoreNode->getAddressSpace();
|
||||
SDValue Value = StoreNode->getValue();
|
||||
EVT ValueVT = Value.getValueType();
|
||||
EVT MemVT = StoreNode->getMemoryVT();
|
||||
unsigned Align = StoreNode->getAlignment();
|
||||
|
||||
SDValue Chain = StoreNode->getChain();
|
||||
SDValue Ptr = StoreNode->getBasePtr();
|
||||
SDValue Value = StoreNode->getValue();
|
||||
|
||||
EVT VT = Value.getValueType();
|
||||
EVT MemVT = StoreNode->getMemoryVT();
|
||||
EVT PtrVT = Ptr.getValueType();
|
||||
|
||||
SDLoc DL(Op);
|
||||
|
||||
// Neither LOCAL nor PRIVATE can do vectors at the moment
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
ValueVT.isVector()) {
|
||||
return SplitVectorStore(Op, DAG);
|
||||
VT.isVector()) {
|
||||
return scalarizeVectorStore(StoreNode, DAG);
|
||||
}
|
||||
|
||||
// Private AS needs special fixes
|
||||
if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
unsigned Align = StoreNode->getAlignment();
|
||||
if (Align < MemVT.getStoreSize() &&
|
||||
!allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
|
||||
return expandUnalignedStore(StoreNode, DAG);
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue Chain = StoreNode->getChain();
|
||||
SDValue Ptr = StoreNode->getBasePtr();
|
||||
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
|
||||
DAG.getConstant(2, DL, PtrVT));
|
||||
|
||||
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
|
||||
// It is beneficial to create MSKOR here instead of combiner to avoid
|
||||
// artificial dependencies introduced by RMW
|
||||
if (StoreNode->isTruncatingStore()) {
|
||||
EVT VT = Value.getValueType();
|
||||
assert(VT.bitsLE(MVT::i32));
|
||||
SDValue MaskConstant;
|
||||
if (MemVT == MVT::i8) {
|
||||
@ -1169,15 +1216,19 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(StoreNode->getAlignment() >= 2);
|
||||
MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
|
||||
}
|
||||
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
|
||||
DAG.getConstant(0x00000003, DL, VT));
|
||||
|
||||
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
|
||||
DAG.getConstant(0x00000003, DL, PtrVT));
|
||||
SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
|
||||
DAG.getConstant(3, DL, VT));
|
||||
|
||||
// Put the mask in correct place
|
||||
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
|
||||
|
||||
// Put the mask in correct place
|
||||
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
|
||||
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
|
||||
DAG.getConstant(3, DL, VT));
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
|
||||
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
|
||||
|
||||
// XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
|
||||
// vector instead.
|
||||
SDValue Src[4] = {
|
||||
@ -1191,12 +1242,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
|
||||
Op->getVTList(), Args, MemVT,
|
||||
StoreNode->getMemOperand());
|
||||
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
|
||||
ValueVT.bitsGE(MVT::i32)) {
|
||||
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
|
||||
// Convert pointer from byte address to dword address.
|
||||
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
|
||||
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
|
||||
Ptr, DAG.getConstant(2, DL, MVT::i32)));
|
||||
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
|
||||
|
||||
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
|
||||
llvm_unreachable("Truncated and indexed stores not supported yet");
|
||||
@ -1207,49 +1255,22 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
}
|
||||
|
||||
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
|
||||
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
|
||||
return SDValue();
|
||||
|
||||
if (MemVT.bitsLT(MVT::i32))
|
||||
return lowerPrivateTruncStore(StoreNode, DAG);
|
||||
|
||||
// Lowering for indirect addressing
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
|
||||
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
|
||||
|
||||
if (ValueVT.isVector()) {
|
||||
unsigned NumElemVT = ValueVT.getVectorNumElements();
|
||||
EVT ElemVT = ValueVT.getVectorElementType();
|
||||
SmallVector<SDValue, 4> Stores(NumElemVT);
|
||||
|
||||
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
|
||||
"vector width in load");
|
||||
|
||||
for (unsigned i = 0; i < NumElemVT; ++i) {
|
||||
unsigned Channel, PtrIncr;
|
||||
getStackAddress(StackWidth, i, Channel, PtrIncr);
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
|
||||
DAG.getConstant(PtrIncr, DL, MVT::i32));
|
||||
SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
|
||||
Value, DAG.getConstant(i, DL, MVT::i32));
|
||||
|
||||
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
|
||||
Chain, Elem, Ptr,
|
||||
DAG.getTargetConstant(Channel, DL, MVT::i32));
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
|
||||
} else {
|
||||
if (ValueVT == MVT::i8) {
|
||||
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
|
||||
}
|
||||
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
|
||||
// Standard i32+ store, tag it with DWORDADDR to note that the address
|
||||
// has been shifted
|
||||
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
|
||||
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
|
||||
return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
|
||||
}
|
||||
|
||||
return Chain;
|
||||
// Tagged i32+ stores will be matched by patterns
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// return (512 + (kc_bank << 12)
|
||||
@ -1299,51 +1320,50 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
|
||||
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
||||
ISD::LoadExtType ExtType = Load->getExtensionType();
|
||||
EVT MemVT = Load->getMemoryVT();
|
||||
assert(Load->getAlignment() >= MemVT.getStoreSize());
|
||||
|
||||
// <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
|
||||
// register (2-)byte extract.
|
||||
SDValue BasePtr = Load->getBasePtr();
|
||||
SDValue Chain = Load->getChain();
|
||||
SDValue Offset = Load->getOffset();
|
||||
|
||||
// Get Register holding the target.
|
||||
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
|
||||
DAG.getConstant(2, DL, MVT::i32));
|
||||
// Load the Register.
|
||||
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
|
||||
Load->getChain(),
|
||||
Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
SDValue LoadPtr = BasePtr;
|
||||
if (!Offset.isUndef()) {
|
||||
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
|
||||
}
|
||||
|
||||
// Get dword location
|
||||
// NOTE: this should be eliminated by the future SHR ptr, 2
|
||||
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
||||
DAG.getConstant(0xfffffffc, DL, MVT::i32));
|
||||
|
||||
// Load dword
|
||||
// TODO: can we be smarter about machine pointer info?
|
||||
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
|
||||
|
||||
// Get offset within the register.
|
||||
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
|
||||
Load->getBasePtr(),
|
||||
DAG.getConstant(0x3, DL, MVT::i32));
|
||||
LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
|
||||
|
||||
// Bit offset of target byte (byteIdx * 8).
|
||||
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
||||
DAG.getConstant(3, DL, MVT::i32));
|
||||
|
||||
// Shift to the right.
|
||||
Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
|
||||
SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
|
||||
|
||||
// Eliminate the upper bits by setting them to ...
|
||||
EVT MemEltVT = MemVT.getScalarType();
|
||||
|
||||
// ... ones.
|
||||
if (ExtType == ISD::SEXTLOAD) {
|
||||
if (ExtType == ISD::SEXTLOAD) { // ... ones.
|
||||
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
|
||||
|
||||
SDValue Ops[] = {
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
|
||||
Load->getChain()
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
|
||||
} else { // ... or zeros.
|
||||
Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
|
||||
}
|
||||
|
||||
// ... or zeros.
|
||||
SDValue Ops[] = {
|
||||
DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
|
||||
Load->getChain()
|
||||
Ret,
|
||||
Read.getValue(1) // This should be our output chain
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
@ -1365,12 +1385,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Chain = LoadNode->getChain();
|
||||
SDValue Ptr = LoadNode->getBasePtr();
|
||||
|
||||
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
|
||||
SDValue MergedValues[2] = {
|
||||
scalarizeVectorLoad(LoadNode, DAG),
|
||||
Chain
|
||||
};
|
||||
return DAG.getMergeValues(MergedValues, DL);
|
||||
if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
VT.isVector()) {
|
||||
return scalarizeVectorLoad(LoadNode, DAG);
|
||||
}
|
||||
|
||||
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
||||
@ -1421,8 +1439,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getMergeValues(MergedValues, DL);
|
||||
}
|
||||
|
||||
SDValue LoweredLoad;
|
||||
|
||||
// For most operations returning SDValue() will result in the node being
|
||||
// expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
|
||||
// need to manually expand loads that may be legal in some address spaces and
|
||||
@ -1447,47 +1463,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Lowering for indirect addressing
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
|
||||
unsigned StackWidth = TFL->getStackWidth(MF);
|
||||
|
||||
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
|
||||
|
||||
if (VT.isVector()) {
|
||||
unsigned NumElemVT = VT.getVectorNumElements();
|
||||
EVT ElemVT = VT.getVectorElementType();
|
||||
SDValue Loads[4];
|
||||
|
||||
assert(NumElemVT <= 4);
|
||||
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
|
||||
"vector width in load");
|
||||
|
||||
for (unsigned i = 0; i < NumElemVT; ++i) {
|
||||
unsigned Channel, PtrIncr;
|
||||
getStackAddress(StackWidth, i, Channel, PtrIncr);
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
|
||||
DAG.getConstant(PtrIncr, DL, MVT::i32));
|
||||
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(Channel, DL, MVT::i32),
|
||||
Op.getOperand(2));
|
||||
}
|
||||
EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
|
||||
LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
|
||||
} else {
|
||||
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
|
||||
Chain, Ptr,
|
||||
DAG.getTargetConstant(0, DL, MVT::i32), // Channel
|
||||
Op.getOperand(2));
|
||||
// DWORDADDR ISD marks already shifted address
|
||||
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
|
||||
assert(VT == MVT::i32);
|
||||
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
|
||||
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
|
||||
return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue Ops[2] = {
|
||||
LoweredLoad,
|
||||
Chain
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -1268,6 +1268,17 @@ let Predicates = [isR600] in {
|
||||
|
||||
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
|
||||
|
||||
// Hardcode channel to 0
|
||||
// NOTE: LSHR is not available here. LSHR is per family instruction
|
||||
def : Pat <
|
||||
(i32 (load_private ADDRIndirect:$addr) ),
|
||||
(R600_RegisterLoad FRAMEri:$addr, (i32 0))
|
||||
>;
|
||||
def : Pat <
|
||||
(store_private i32:$val, ADDRIndirect:$addr),
|
||||
(R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0))
|
||||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pseudo instructions
|
||||
|
@ -99,6 +99,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i1, Custom);
|
||||
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
|
||||
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
|
||||
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
|
||||
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
|
||||
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
|
||||
@ -699,7 +711,8 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
|
||||
|
||||
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||
const SDLoc &SL, SDValue Chain,
|
||||
unsigned Offset, bool Signed) const {
|
||||
unsigned Offset, bool Signed,
|
||||
const ISD::InputArg *Arg) const {
|
||||
const DataLayout &DL = DAG.getDataLayout();
|
||||
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
|
||||
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
@ -713,20 +726,21 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant);
|
||||
|
||||
SDValue Val;
|
||||
SDValue Val = Load;
|
||||
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
|
||||
VT.bitsLT(MemVT)) {
|
||||
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
|
||||
Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
|
||||
}
|
||||
|
||||
if (MemVT.isFloatingPoint())
|
||||
Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
|
||||
Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
|
||||
else if (Signed)
|
||||
Val = DAG.getSExtOrTrunc(Load, SL, VT);
|
||||
Val = DAG.getSExtOrTrunc(Val, SL, VT);
|
||||
else
|
||||
Val = DAG.getZExtOrTrunc(Load, SL, VT);
|
||||
Val = DAG.getZExtOrTrunc(Val, SL, VT);
|
||||
|
||||
SDValue Ops[] = {
|
||||
Val,
|
||||
Load.getValue(1)
|
||||
};
|
||||
|
||||
return DAG.getMergeValues(Ops, SL);
|
||||
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFormalArguments(
|
||||
@ -899,7 +913,8 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
// The first 36 bytes of the input buffer contains information about
|
||||
// thread group and global sizes.
|
||||
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
|
||||
Offset, Ins[i].Flags.isSExt());
|
||||
Offset, Ins[i].Flags.isSExt(),
|
||||
&Ins[i]);
|
||||
Chains.push_back(Arg.getValue(1));
|
||||
|
||||
auto *ParamTy =
|
||||
|
@ -24,7 +24,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
|
||||
unsigned Offset) const;
|
||||
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
|
||||
SDValue Chain, unsigned Offset, bool Signed) const;
|
||||
SDValue Chain, unsigned Offset, bool Signed,
|
||||
const ISD::InputArg *Arg = nullptr) const;
|
||||
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
||||
SelectionDAG &DAG) const override;
|
||||
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
|
||||
|
@ -203,8 +203,8 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
|
||||
bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
|
||||
unsigned ConstraintCode,
|
||||
std::vector<SDValue> &OutOps) {
|
||||
assert(ConstraintCode == InlineAsm::Constraint_m ||
|
||||
ConstraintCode == InlineAsm::Constraint_Q &&
|
||||
assert((ConstraintCode == InlineAsm::Constraint_m ||
|
||||
ConstraintCode == InlineAsm::Constraint_Q) &&
|
||||
"Unexpected asm memory constraint");
|
||||
|
||||
MachineRegisterInfo &RI = MF->getRegInfo();
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "AVRISelLowering.h"
|
||||
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
@ -1933,5 +1934,45 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
|
||||
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
|
||||
}
|
||||
|
||||
unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
|
||||
EVT VT,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned Reg;
|
||||
|
||||
if (VT == MVT::i8) {
|
||||
Reg = StringSwitch<unsigned>(RegName)
|
||||
.Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2)
|
||||
.Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5)
|
||||
.Case("r6", AVR::R6).Case("r7", AVR::R7).Case("r8", AVR::R8)
|
||||
.Case("r9", AVR::R9).Case("r10", AVR::R10).Case("r11", AVR::R11)
|
||||
.Case("r12", AVR::R12).Case("r13", AVR::R13).Case("r14", AVR::R14)
|
||||
.Case("r15", AVR::R15).Case("r16", AVR::R16).Case("r17", AVR::R17)
|
||||
.Case("r18", AVR::R18).Case("r19", AVR::R19).Case("r20", AVR::R20)
|
||||
.Case("r21", AVR::R21).Case("r22", AVR::R22).Case("r23", AVR::R23)
|
||||
.Case("r24", AVR::R24).Case("r25", AVR::R25).Case("r26", AVR::R26)
|
||||
.Case("r27", AVR::R27).Case("r28", AVR::R28).Case("r29", AVR::R29)
|
||||
.Case("r30", AVR::R30).Case("r31", AVR::R31)
|
||||
.Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
|
||||
.Default(0);
|
||||
} else {
|
||||
Reg = StringSwitch<unsigned>(RegName)
|
||||
.Case("r0", AVR::R1R0).Case("r2", AVR::R3R2)
|
||||
.Case("r4", AVR::R5R4).Case("r6", AVR::R7R6)
|
||||
.Case("r8", AVR::R9R8).Case("r10", AVR::R11R10)
|
||||
.Case("r12", AVR::R13R12).Case("r14", AVR::R15R14)
|
||||
.Case("r16", AVR::R17R16).Case("r18", AVR::R19R18)
|
||||
.Case("r20", AVR::R21R20).Case("r22", AVR::R23R22)
|
||||
.Case("r24", AVR::R25R24).Case("r26", AVR::R27R26)
|
||||
.Case("r28", AVR::R29R28).Case("r30", AVR::R31R30)
|
||||
.Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
|
||||
.Default(0);
|
||||
}
|
||||
|
||||
if (Reg)
|
||||
return Reg;
|
||||
|
||||
report_fatal_error("Invalid register name global variable");
|
||||
}
|
||||
|
||||
} // end of namespace llvm
|
||||
|
||||
|
@ -116,6 +116,9 @@ class AVRTargetLowering : public TargetLowering {
|
||||
std::vector<SDValue> &Ops,
|
||||
SelectionDAG &DAG) const override;
|
||||
|
||||
unsigned getRegisterByName(const char* RegName, EVT VT,
|
||||
SelectionDAG &DAG) const override;
|
||||
|
||||
private:
|
||||
SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
|
||||
SelectionDAG &DAG, SDLoc dl) const;
|
||||
|
@ -13,15 +13,13 @@
|
||||
|
||||
#include "BPF.h"
|
||||
#include "BPFInstrInfo.h"
|
||||
#include "BPFSubtarget.h"
|
||||
#include "BPFTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/IR/DebugLoc.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
|
||||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#include "BPFGenInstrInfo.inc"
|
||||
@ -109,11 +107,11 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
|
||||
while (std::next(I) != MBB.end())
|
||||
std::next(I)->eraseFromParent();
|
||||
Cond.clear();
|
||||
FBB = 0;
|
||||
FBB = nullptr;
|
||||
|
||||
// Delete the J if it's equivalent to a fall-through.
|
||||
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
|
||||
TBB = 0;
|
||||
TBB = nullptr;
|
||||
I->eraseFromParent();
|
||||
I = MBB.end();
|
||||
continue;
|
||||
|
@ -12,16 +12,15 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BPF.h"
|
||||
#include "BPFRegisterInfo.h"
|
||||
#include "BPFSubtarget.h"
|
||||
#include "MCTargetDesc/BPFMCTargetDesc.h"
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
||||
#include "llvm/MC/MCFixedLenDisassembler.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -36,14 +35,15 @@ class BPFDisassembler : public MCDisassembler {
|
||||
public:
|
||||
BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
|
||||
: MCDisassembler(STI, Ctx) {}
|
||||
virtual ~BPFDisassembler() {}
|
||||
~BPFDisassembler() override = default;
|
||||
|
||||
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
raw_ostream &VStream,
|
||||
raw_ostream &CStream) const override;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
static MCDisassembler *createBPFDisassembler(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
|
@ -8,28 +8,24 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/BPFMCTargetDesc.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCDirectives.h"
|
||||
#include "llvm/MC/MCELFObjectWriter.h"
|
||||
#include "llvm/MC/MCFixupKindInfo.h"
|
||||
#include "llvm/MC/MCFixup.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class BPFAsmBackend : public MCAsmBackend {
|
||||
public:
|
||||
bool IsLittleEndian;
|
||||
|
||||
BPFAsmBackend(bool IsLittleEndian)
|
||||
: MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
|
||||
~BPFAsmBackend() override {}
|
||||
~BPFAsmBackend() override = default;
|
||||
|
||||
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
||||
uint64_t Value, bool IsPCRel) const override;
|
||||
@ -53,6 +49,8 @@ class BPFAsmBackend : public MCAsmBackend {
|
||||
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
if ((Count % 8) != 0)
|
||||
return false;
|
||||
@ -66,7 +64,6 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
|
||||
unsigned DataSize, uint64_t Value,
|
||||
bool IsPCRel) const {
|
||||
|
||||
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
|
||||
assert(Value == 0);
|
||||
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
|
||||
@ -92,7 +89,6 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
|
||||
MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
|
||||
return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
|
||||
}
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
|
||||
const MCRegisterInfo &MRI,
|
||||
|
@ -10,29 +10,30 @@
|
||||
#include "MCTargetDesc/BPFMCTargetDesc.h"
|
||||
#include "llvm/MC/MCELFObjectWriter.h"
|
||||
#include "llvm/MC/MCFixup.h"
|
||||
#include "llvm/Support/ELF.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class BPFELFObjectWriter : public MCELFObjectTargetWriter {
|
||||
public:
|
||||
BPFELFObjectWriter(uint8_t OSABI);
|
||||
|
||||
~BPFELFObjectWriter() override;
|
||||
~BPFELFObjectWriter() override = default;
|
||||
|
||||
protected:
|
||||
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
|
||||
const MCFixup &Fixup, bool IsPCRel) const override;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
|
||||
: MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_BPF,
|
||||
/*HasRelocationAddend*/ false) {}
|
||||
|
||||
BPFELFObjectWriter::~BPFELFObjectWriter() {}
|
||||
|
||||
unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
|
||||
const MCFixup &Fixup,
|
||||
bool IsPCRel) const {
|
||||
|
@ -12,24 +12,25 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/BPFMCTargetDesc.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCFixup.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/EndianStream.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "mccodeemitter"
|
||||
|
||||
namespace {
|
||||
|
||||
class BPFMCCodeEmitter : public MCCodeEmitter {
|
||||
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
|
||||
void operator=(const BPFMCCodeEmitter &) = delete;
|
||||
const MCInstrInfo &MCII;
|
||||
const MCRegisterInfo &MRI;
|
||||
bool IsLittleEndian;
|
||||
@ -38,8 +39,9 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
|
||||
BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
|
||||
bool IsLittleEndian)
|
||||
: MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {}
|
||||
|
||||
~BPFMCCodeEmitter() {}
|
||||
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
|
||||
void operator=(const BPFMCCodeEmitter &) = delete;
|
||||
~BPFMCCodeEmitter() override = default;
|
||||
|
||||
// getBinaryCodeForInstr - TableGen'erated function for getting the
|
||||
// binary encoding for an instruction.
|
||||
@ -66,7 +68,8 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
|
||||
void verifyInstructionPredicates(const MCInst &MI,
|
||||
uint64_t AvailableFeatures) const;
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
|
@ -12,14 +12,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "BPF.h"
|
||||
#include "BPFMCTargetDesc.h"
|
||||
#include "BPFMCAsmInfo.h"
|
||||
#include "InstPrinter/BPFInstPrinter.h"
|
||||
#include "MCTargetDesc/BPFMCTargetDesc.h"
|
||||
#include "MCTargetDesc/BPFMCAsmInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/Host.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
@ -64,7 +63,7 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
|
||||
const MCRegisterInfo &MRI) {
|
||||
if (SyntaxVariant == 0)
|
||||
return new BPFInstPrinter(MAI, MII, MRI);
|
||||
return 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeBPFTargetMC() {
|
||||
|
@ -101,7 +101,7 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
|
||||
}
|
||||
|
||||
LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
|
||||
const char* Triple, const char* CPU, const char* Features,
|
||||
const char *Triple, const char *CPU, const char *Features,
|
||||
LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
|
||||
LLVMCodeModel CodeModel) {
|
||||
Optional<Reloc::Model> RM;
|
||||
@ -139,7 +139,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
|
||||
|
||||
TargetOptions opt;
|
||||
return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
|
||||
CM, OL));
|
||||
CM, OL));
|
||||
}
|
||||
|
||||
void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
|
||||
|
@ -28,6 +28,7 @@ class FunctionPass;
|
||||
// LLVM IR passes.
|
||||
ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
|
||||
void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
|
||||
ModulePass *createWebAssemblyFixFunctionBitcasts();
|
||||
FunctionPass *createWebAssemblyOptimizeReturned();
|
||||
|
||||
// ISel and immediate followup passes.
|
||||
|
@ -0,0 +1,159 @@
|
||||
//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// \brief Fix bitcasted functions.
|
||||
///
|
||||
/// WebAssembly requires caller and callee signatures to match, however in LLVM,
|
||||
/// some amount of slop is vaguely permitted. Detect mismatch by looking for
|
||||
/// bitcasts of functions and rewrite them to use wrapper functions instead.
|
||||
///
|
||||
/// This doesn't catch all cases, such as when a function's address is taken in
|
||||
/// one place and casted in another, but it works for many common cases.
|
||||
///
|
||||
/// Note that LLVM already optimizes away function bitcasts in common cases by
|
||||
/// dropping arguments as needed, so this pass only ends up getting used in less
|
||||
/// common cases.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "WebAssembly.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "wasm-fix-function-bitcasts"
|
||||
|
||||
namespace {
|
||||
class FixFunctionBitcasts final : public ModulePass {
|
||||
StringRef getPassName() const override {
|
||||
return "WebAssembly Fix Function Bitcasts";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
ModulePass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
FixFunctionBitcasts() : ModulePass(ID) {}
|
||||
};
|
||||
} // End anonymous namespace
|
||||
|
||||
char FixFunctionBitcasts::ID = 0;
|
||||
ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
|
||||
return new FixFunctionBitcasts();
|
||||
}
|
||||
|
||||
// Recursively descend the def-use lists from V to find non-bitcast users of
|
||||
// bitcasts of V.
|
||||
static void FindUses(Value *V, Function &F,
|
||||
SmallVectorImpl<std::pair<Use *, Function *>> &Uses) {
|
||||
for (Use &U : V->uses()) {
|
||||
if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
|
||||
FindUses(BC, F, Uses);
|
||||
else if (U.get()->getType() != F.getType())
|
||||
Uses.push_back(std::make_pair(&U, &F));
|
||||
}
|
||||
}
|
||||
|
||||
// Create a wrapper function with type Ty that calls F (which may have a
|
||||
// different type). Attempt to support common bitcasted function idioms:
|
||||
// - Call with more arguments than needed: arguments are dropped
|
||||
// - Call with fewer arguments than needed: arguments are filled in with undef
|
||||
// - Return value is not needed: drop it
|
||||
// - Return value needed but not present: supply an undef
|
||||
//
|
||||
// For now, return nullptr without creating a wrapper if the wrapper cannot
|
||||
// be generated due to incompatible types.
|
||||
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
|
||||
Module *M = F->getParent();
|
||||
|
||||
Function *Wrapper =
|
||||
Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
|
||||
BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
|
||||
|
||||
// Determine what arguments to pass.
|
||||
SmallVector<Value *, 4> Args;
|
||||
Function::arg_iterator AI = Wrapper->arg_begin();
|
||||
FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
|
||||
FunctionType::param_iterator PE = F->getFunctionType()->param_end();
|
||||
for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) {
|
||||
if (AI->getType() != *PI) {
|
||||
Wrapper->eraseFromParent();
|
||||
return nullptr;
|
||||
}
|
||||
Args.push_back(&*AI);
|
||||
}
|
||||
for (; PI != PE; ++PI)
|
||||
Args.push_back(UndefValue::get(*PI));
|
||||
|
||||
CallInst *Call = CallInst::Create(F, Args, "", BB);
|
||||
|
||||
// Determine what value to return.
|
||||
if (Ty->getReturnType()->isVoidTy())
|
||||
ReturnInst::Create(M->getContext(), BB);
|
||||
else if (F->getFunctionType()->getReturnType()->isVoidTy())
|
||||
ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
|
||||
BB);
|
||||
else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
|
||||
ReturnInst::Create(M->getContext(), Call, BB);
|
||||
else {
|
||||
Wrapper->eraseFromParent();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return Wrapper;
|
||||
}
|
||||
|
||||
bool FixFunctionBitcasts::runOnModule(Module &M) {
|
||||
SmallVector<std::pair<Use *, Function *>, 0> Uses;
|
||||
|
||||
// Collect all the places that need wrappers.
|
||||
for (Function &F : M)
|
||||
FindUses(&F, F, Uses);
|
||||
|
||||
DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
|
||||
|
||||
for (auto &UseFunc : Uses) {
|
||||
Use *U = UseFunc.first;
|
||||
Function *F = UseFunc.second;
|
||||
PointerType *PTy = cast<PointerType>(U->get()->getType());
|
||||
FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
|
||||
|
||||
// If the function is casted to something like i8* as a "generic pointer"
|
||||
// to be later casted to something else, we can't generate a wrapper for it.
|
||||
// Just ignore such casts for now.
|
||||
if (!Ty)
|
||||
continue;
|
||||
|
||||
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
|
||||
if (Pair.second)
|
||||
Pair.first->second = CreateWrapper(F, Ty);
|
||||
|
||||
Function *Wrapper = Pair.first->second;
|
||||
if (!Wrapper)
|
||||
continue;
|
||||
|
||||
if (isa<Constant>(U->get()))
|
||||
U->get()->replaceAllUsesWith(Wrapper);
|
||||
else
|
||||
U->set(Wrapper);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -40,8 +40,8 @@ defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>;
|
||||
defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm EQ : ComparisonInt<SETEQ, "eq ", 0x46, 0x68>;
|
||||
defm NE : ComparisonInt<SETNE, "ne ", 0x47, 0x69>;
|
||||
defm EQ : ComparisonInt<SETEQ, "eq ", 0x46, 0x51>;
|
||||
defm NE : ComparisonInt<SETNE, "ne ", 0x47, 0x52>;
|
||||
} // isCommutable = 1
|
||||
defm LT_S : ComparisonInt<SETLT, "lt_s", 0x48, 0x53>;
|
||||
defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>;
|
||||
|
@ -163,6 +163,10 @@ void WebAssemblyPassConfig::addIRPasses() {
|
||||
// control specifically what gets lowered.
|
||||
addPass(createAtomicExpandPass(TM));
|
||||
|
||||
// Fix function bitcasts, as WebAssembly requires caller and callee signatures
|
||||
// to match.
|
||||
addPass(createWebAssemblyFixFunctionBitcasts());
|
||||
|
||||
// Optimize "returned" function attributes.
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createWebAssemblyOptimizeReturned());
|
||||
|
@ -6962,23 +6962,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
|
||||
}
|
||||
|
||||
/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
|
||||
/// node.
|
||||
static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
|
||||
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
|
||||
/// Returns true iff \p BV builds a vector with the result equivalent to
|
||||
/// the result of ADDSUB operation.
|
||||
/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
|
||||
/// are written to the parameters \p Opnd0 and \p Opnd1.
|
||||
static bool isAddSub(const BuildVectorSDNode *BV,
|
||||
const X86Subtarget &Subtarget, SelectionDAG &DAG,
|
||||
SDValue &Opnd0, SDValue &Opnd1) {
|
||||
|
||||
MVT VT = BV->getSimpleValueType(0);
|
||||
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
|
||||
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
|
||||
return SDValue();
|
||||
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
|
||||
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
|
||||
return false;
|
||||
|
||||
SDLoc DL(BV);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDValue InVec0 = DAG.getUNDEF(VT);
|
||||
SDValue InVec1 = DAG.getUNDEF(VT);
|
||||
|
||||
assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
|
||||
VT == MVT::v2f64) && "build_vector with an invalid type found!");
|
||||
|
||||
// Odd-numbered elements in the input build vector are obtained from
|
||||
// adding two integer/float elements.
|
||||
// Even-numbered elements in the input build vector are obtained from
|
||||
@ -7000,7 +7001,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
|
||||
|
||||
// Early exit if we found an unexpected opcode.
|
||||
if (Opcode != ExpectedOpcode)
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
@ -7013,11 +7014,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
|
||||
!isa<ConstantSDNode>(Op0.getOperand(1)) ||
|
||||
!isa<ConstantSDNode>(Op1.getOperand(1)) ||
|
||||
Op0.getOperand(1) != Op1.getOperand(1))
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
|
||||
if (I0 != i)
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// We found a valid add/sub node. Update the information accordingly.
|
||||
if (i & 1)
|
||||
@ -7029,39 +7030,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
|
||||
if (InVec0.isUndef()) {
|
||||
InVec0 = Op0.getOperand(0);
|
||||
if (InVec0.getSimpleValueType() != VT)
|
||||
return SDValue();
|
||||
return false;
|
||||
}
|
||||
if (InVec1.isUndef()) {
|
||||
InVec1 = Op1.getOperand(0);
|
||||
if (InVec1.getSimpleValueType() != VT)
|
||||
return SDValue();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure that operands in input to each add/sub node always
|
||||
// come from a same pair of vectors.
|
||||
if (InVec0 != Op0.getOperand(0)) {
|
||||
if (ExpectedOpcode == ISD::FSUB)
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// FADD is commutable. Try to commute the operands
|
||||
// and then test again.
|
||||
std::swap(Op0, Op1);
|
||||
if (InVec0 != Op0.getOperand(0))
|
||||
return SDValue();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (InVec1 != Op1.getOperand(0))
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// Update the pair of expected opcodes.
|
||||
std::swap(ExpectedOpcode, NextExpectedOpcode);
|
||||
}
|
||||
|
||||
// Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
|
||||
if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef())
|
||||
return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
|
||||
if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
|
||||
return false;
|
||||
|
||||
return SDValue();
|
||||
Opnd0 = InVec0;
|
||||
Opnd1 = InVec1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if is possible to fold MUL and an idiom that has already been
|
||||
/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
|
||||
/// If (and only if) true is returned, the operands of FMADDSUB are written to
|
||||
/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
|
||||
///
|
||||
/// Prior to calling this function it should be known that there is some
|
||||
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
|
||||
/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
|
||||
/// before replacement of such SDNode with ADDSUB operation. Thus the number
|
||||
/// of \p Opnd0 uses is expected to be equal to 2.
|
||||
/// For example, this function may be called for the following IR:
|
||||
/// %AB = fmul fast <2 x double> %A, %B
|
||||
/// %Sub = fsub fast <2 x double> %AB, %C
|
||||
/// %Add = fadd fast <2 x double> %AB, %C
|
||||
/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
|
||||
/// <2 x i32> <i32 0, i32 3>
|
||||
/// There is a def for %Addsub here, which potentially can be replaced by
|
||||
/// X86ISD::ADDSUB operation:
|
||||
/// %Addsub = X86ISD::ADDSUB %AB, %C
|
||||
/// and such ADDSUB can further be replaced with FMADDSUB:
|
||||
/// %Addsub = FMADDSUB %A, %B, %C.
|
||||
///
|
||||
/// The main reason why this method is called before the replacement of the
|
||||
/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
|
||||
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
|
||||
/// FMADDSUB is.
|
||||
static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
|
||||
SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
|
||||
if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
|
||||
!Subtarget.hasAnyFMA())
|
||||
return false;
|
||||
|
||||
// FIXME: These checks must match the similar ones in
|
||||
// DAGCombiner::visitFADDForFMACombine. It would be good to have one
|
||||
// function that would answer if it is Ok to fuse MUL + ADD to FMADD
|
||||
// or MUL + ADDSUB to FMADDSUB.
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
bool AllowFusion =
|
||||
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
|
||||
if (!AllowFusion)
|
||||
return false;
|
||||
|
||||
Opnd2 = Opnd1;
|
||||
Opnd1 = Opnd0.getOperand(1);
|
||||
Opnd0 = Opnd0.getOperand(0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
|
||||
/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
|
||||
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDValue Opnd0, Opnd1;
|
||||
if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
|
||||
return SDValue();
|
||||
|
||||
MVT VT = BV->getSimpleValueType(0);
|
||||
SDLoc DL(BV);
|
||||
|
||||
// Try to generate X86ISD::FMADDSUB node here.
|
||||
SDValue Opnd2;
|
||||
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
|
||||
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
|
||||
|
||||
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
|
||||
// the ADDSUB idiom has been successfully recognized. There are no known
|
||||
// X86 targets with 512-bit ADDSUB instructions!
|
||||
// 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
|
||||
// recognition.
|
||||
if (VT.is512BitVector())
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
|
||||
}
|
||||
|
||||
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
|
||||
@ -7290,7 +7370,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
return VectorConstant;
|
||||
|
||||
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
|
||||
if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG))
|
||||
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
|
||||
return AddSub;
|
||||
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
|
||||
return HorizontalOp;
|
||||
@ -12965,6 +13045,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
if (Subtarget.hasVBMI())
|
||||
return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
|
||||
|
||||
// Try to create an in-lane repeating shuffle mask and then shuffle the
|
||||
// the results into the target lanes.
|
||||
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
|
||||
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
|
||||
}
|
||||
@ -16985,9 +17071,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
|
||||
}
|
||||
|
||||
if (Cond.getOpcode() == ISD::SETCC)
|
||||
if (SDValue NewCond = LowerSETCC(Cond, DAG))
|
||||
if (Cond.getOpcode() == ISD::SETCC) {
|
||||
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
|
||||
Cond = NewCond;
|
||||
// If the condition was updated, it's possible that the operands of the
|
||||
// select were also updated (for example, EmitTest has a RAUW). Refresh
|
||||
// the local references to the select operands in case they got stale.
|
||||
Op1 = Op.getOperand(1);
|
||||
Op2 = Op.getOperand(2);
|
||||
}
|
||||
}
|
||||
|
||||
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
|
||||
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
|
||||
@ -17193,22 +17286,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
|
||||
if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
|
||||
return SDValue();
|
||||
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
|
||||
if (VT.is512BitVector() && InVTElt != MVT::i1) {
|
||||
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
|
||||
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
}
|
||||
|
||||
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
assert (InVTElt == MVT::i1 && "Unexpected vector type");
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
|
||||
SDValue NegOne = DAG.getConstant(
|
||||
APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
|
||||
SDValue Zero = DAG.getConstant(
|
||||
APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
|
||||
SDValue V;
|
||||
if (Subtarget.hasDQI()) {
|
||||
V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
|
||||
assert(!VT.is512BitVector() && "Unexpected vector type");
|
||||
} else {
|
||||
SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
|
||||
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
|
||||
V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
|
||||
if (VT.is512BitVector())
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
|
||||
if (VT.is512BitVector())
|
||||
return V;
|
||||
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
|
||||
}
|
||||
|
||||
@ -21528,6 +21625,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
|
||||
}
|
||||
|
||||
// It's worth extending once and using the vXi16/vXi32 shifts for smaller
|
||||
// types, but without AVX512 the extra overheads to get from vXi8 to vXi32
|
||||
// make the existing SSE solution better.
|
||||
if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||
|
||||
(Subtarget.hasAVX512() && VT == MVT::v16i16) ||
|
||||
(Subtarget.hasAVX512() && VT == MVT::v16i8) ||
|
||||
(Subtarget.hasBWI() && VT == MVT::v32i8)) {
|
||||
MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
|
||||
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
|
||||
unsigned ExtOpc =
|
||||
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
|
||||
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
|
||||
}
|
||||
|
||||
if (VT == MVT::v16i8 ||
|
||||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
|
||||
@ -21636,19 +21750,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
|
||||
}
|
||||
}
|
||||
|
||||
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
|
||||
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
|
||||
// solution better.
|
||||
if (Subtarget.hasInt256() && VT == MVT::v8i16) {
|
||||
MVT ExtVT = MVT::v8i32;
|
||||
unsigned ExtOpc =
|
||||
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
|
||||
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT,
|
||||
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
|
||||
}
|
||||
|
||||
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
|
||||
MVT ExtVT = MVT::v8i32;
|
||||
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
@ -27763,29 +27864,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// \brief Try to combine a shuffle into a target-specific add-sub node.
|
||||
/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
|
||||
/// operation. If true is returned then the operands of ADDSUB operation
|
||||
/// are written to the parameters \p Opnd0 and \p Opnd1.
|
||||
///
|
||||
/// We combine this directly on the abstract vector shuffle nodes so it is
|
||||
/// easier to generically match. We also insert dummy vector shuffle nodes for
|
||||
/// the operands which explicitly discard the lanes which are unused by this
|
||||
/// operation to try to flow through the rest of the combiner the fact that
|
||||
/// they're unused.
|
||||
static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
|
||||
/// so it is easier to generically match. We also insert dummy vector shuffle
|
||||
/// nodes for the operands which explicitly discard the lanes which are unused
|
||||
/// by this operation to try to flow through the rest of the combiner
|
||||
/// the fact that they're unused.
|
||||
static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
|
||||
SDValue &Opnd0, SDValue &Opnd1) {
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
|
||||
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
|
||||
return SDValue();
|
||||
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
|
||||
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
|
||||
return false;
|
||||
|
||||
// We only handle target-independent shuffles.
|
||||
// FIXME: It would be easy and harmless to use the target shuffle mask
|
||||
// extraction tool to support more.
|
||||
if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
|
||||
SmallVector<int, 8> Mask(OrigMask.begin(), OrigMask.end());
|
||||
SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());
|
||||
|
||||
SDValue V1 = N->getOperand(0);
|
||||
SDValue V2 = N->getOperand(1);
|
||||
@ -27796,27 +27900,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
|
||||
ShuffleVectorSDNode::commuteMask(Mask);
|
||||
std::swap(V1, V2);
|
||||
} else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// If there are other uses of these operations we can't fold them.
|
||||
if (!V1->hasOneUse() || !V2->hasOneUse())
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// Ensure that both operations have the same operands. Note that we can
|
||||
// commute the FADD operands.
|
||||
SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
|
||||
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
|
||||
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
|
||||
return SDValue();
|
||||
return false;
|
||||
|
||||
// We're looking for blends between FADD and FSUB nodes. We insist on these
|
||||
// nodes being lined up in a specific expected pattern.
|
||||
if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
|
||||
isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
|
||||
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
|
||||
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
|
||||
isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
|
||||
8, 25, 10, 27, 12, 29, 14, 31})))
|
||||
return false;
|
||||
|
||||
Opnd0 = LHS;
|
||||
Opnd1 = RHS;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Try to combine a shuffle into a target-specific add-sub or
|
||||
/// mul-add-sub node.
|
||||
static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SDValue Opnd0, Opnd1;
|
||||
if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
|
||||
// Try to generate X86ISD::FMADDSUB node here.
|
||||
SDValue Opnd2;
|
||||
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
|
||||
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
|
||||
|
||||
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
|
||||
// the ADDSUB idiom has been successfully recognized. There are no known
|
||||
// X86 targets with 512-bit ADDSUB instructions!
|
||||
if (VT.is512BitVector())
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
|
||||
}
|
||||
|
||||
// We are looking for a shuffle where both sources are concatenated with undef
|
||||
@ -27878,7 +28012,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
|
||||
// If we have legalized the vector types, look for blends of FADD and FSUB
|
||||
// nodes that we can fuse into an ADDSUB node.
|
||||
if (TLI.isTypeLegal(VT))
|
||||
if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
|
||||
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
|
||||
return AddSub;
|
||||
|
||||
// During Type Legalization, when promoting illegal vector types,
|
||||
|
@ -443,6 +443,22 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
|
||||
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
|
||||
}
|
||||
|
||||
// Alias instructions that allow VPTERNLOG to be used with a mask to create
|
||||
// a mix of all ones and all zeros elements. This is done this way to force
|
||||
// the same register to be used as input for all three sources.
|
||||
let isPseudo = 1, Predicates = [HasAVX512] in {
|
||||
def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
|
||||
(ins VK16WM:$mask), "",
|
||||
[(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
|
||||
(v16i32 immAllOnesV),
|
||||
(v16i32 immAllZerosV)))]>;
|
||||
def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
|
||||
(ins VK8WM:$mask), "",
|
||||
[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
|
||||
(bc_v8i64 (v16i32 immAllOnesV)),
|
||||
(bc_v8i64 (v16i32 immAllZerosV))))]>;
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
|
||||
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
|
||||
@ -1064,10 +1080,10 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
|
||||
(v8f32 VR256X:$src), 1)>;
|
||||
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
|
||||
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
|
||||
(v4f64 VR256X:$src), 1)>;
|
||||
(v4f64 VR256X:$src), 1)>;
|
||||
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
|
||||
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
|
||||
(v4i64 VR256X:$src), 1)>;
|
||||
(v4i64 VR256X:$src), 1)>;
|
||||
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
|
||||
(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
|
||||
(v8i32 VR256X:$src), 1)>;
|
||||
@ -1485,8 +1501,7 @@ defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
|
||||
// AVX-512 - BLEND using mask
|
||||
//
|
||||
multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
let hasSideEffects = 0 in
|
||||
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
@ -1496,16 +1511,13 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
|
||||
[(set _.RC:$dst, (vselect _.KRCWM:$mask,
|
||||
(_.VT _.RC:$src2),
|
||||
(_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
|
||||
let hasSideEffects = 0 in
|
||||
[]>, EVEX_4V, EVEX_K;
|
||||
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
|
||||
[]>, EVEX_4V, EVEX_KZ;
|
||||
let mayLoad = 1, hasSideEffects = 0 in
|
||||
let mayLoad = 1 in {
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.MemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
@ -1515,38 +1527,32 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
|
||||
[(set _.RC:$dst, (vselect _.KRCWM:$mask,
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src2))),
|
||||
(_.VT _.RC:$src1)))]>,
|
||||
EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
let mayLoad = 1, hasSideEffects = 0 in
|
||||
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
|
||||
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
}
|
||||
}
|
||||
}
|
||||
multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
|
||||
let mayLoad = 1, hasSideEffects = 0 in {
|
||||
def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
|
||||
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
|
||||
[(set _.RC:$dst,(vselect _.KRCWM:$mask,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
|
||||
(_.VT _.RC:$src1)))]>,
|
||||
EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
[]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
|
||||
let mayLoad = 1, hasSideEffects = 0 in
|
||||
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
|
||||
"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
|
||||
[]>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
|
||||
@ -1582,21 +1588,6 @@ defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
|
||||
defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
|
||||
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
|
||||
(v8f32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
|
||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||
(v8i32 VR256X:$src2))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Compare Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2735,7 +2726,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
||||
(ins _.KRCWM:$mask, _.RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
|
||||
"${dst} {${mask}} {z}, $src}"),
|
||||
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
|
||||
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
|
||||
(_.VT _.RC:$src),
|
||||
_.ImmAllZerosV)))], _.ExeDomain>,
|
||||
EVEX, EVEX_KZ;
|
||||
@ -2972,6 +2963,30 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
|
||||
(v16i32 VR512:$src))),
|
||||
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
|
||||
|
||||
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
|
||||
// available. Use a 512-bit operation and extract.
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
|
||||
(v8f32 VR256X:$src0))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16f32
|
||||
(VMOVAPSZrrk
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
|
||||
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||
(v8i32 VR256X:$src0))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16i32
|
||||
(VMOVDQA32Zrrk
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
|
||||
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX, NoBWI] in {
|
||||
// 128-bit load/store without BWI.
|
||||
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
|
||||
@ -3116,13 +3131,13 @@ let Predicates = [HasVLX] in {
|
||||
(VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
|
||||
}
|
||||
|
||||
|
||||
// Move Int Doubleword to Packed Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
|
||||
// Move Int Doubleword to Packed Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
|
||||
EVEX;
|
||||
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
|
||||
@ -3152,47 +3167,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src
|
||||
def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Int Doubleword to Single Scalar
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert GR32:$src))],
|
||||
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
|
||||
EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Int Doubleword to Single Scalar
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert GR32:$src))],
|
||||
IIC_SSE_MOVDQ>, EVEX;
|
||||
|
||||
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move doubleword from xmm register to r/m32
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move doubleword from xmm register to r/m32
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
|
||||
EVEX;
|
||||
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
|
||||
(ins i32mem:$dst, VR128X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
|
||||
EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move quadword from xmm1 register to r/m64
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
|
||||
[(store (i32 (extractelt (v4i32 VR128X:$src),
|
||||
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
|
||||
EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move quadword from xmm1 register to r/m64
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
|
||||
(iPTR 0)))],
|
||||
IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
|
||||
Requires<[HasAVX512, In64BitMode]>;
|
||||
@ -3213,39 +3228,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
|
||||
|
||||
let hasSideEffects = 0 in
|
||||
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
|
||||
(ins VR128X:$src),
|
||||
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
|
||||
EVEX, VEX_W;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
|
||||
(ins FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
(ins VR128X:$src),
|
||||
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
|
||||
EVEX, VEX_W;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
|
||||
(ins FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (bitconvert FR32X:$src))],
|
||||
IIC_SSE_MOVD_ToGP>, EVEX;
|
||||
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
|
||||
(ins i32mem:$dst, FR32X:$src),
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move Quadword Int to Packed Quadword Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
|
||||
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 MOVSS, MOVSD
|
||||
"vmovd\t{$src, $dst|$dst, $src}",
|
||||
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
|
||||
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
|
||||
|
||||
// Move Quadword Int to Packed Quadword Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins i64mem:$src),
|
||||
"vmovq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128X:$dst,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
|
||||
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 MOVSS, MOVSD
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_move_scalar<string asm, SDNode OpNode,
|
||||
@ -8646,6 +8661,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(VMOVDDUPZ128rm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
|
||||
(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
|
||||
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
|
||||
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(v2f64 VR128X:$src0)),
|
||||
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
|
||||
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(bitconvert (v4i32 immAllZerosV))),
|
||||
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -543,7 +543,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::MOV8rr, X86::MOV8rm, 0 },
|
||||
{ X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
|
||||
{ X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
|
||||
{ X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
|
||||
{ X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
|
||||
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
|
||||
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
|
||||
{ X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
|
||||
@ -661,7 +661,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
|
||||
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
|
||||
{ X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
|
||||
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
|
||||
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
|
||||
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
|
||||
{ X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
|
||||
{ X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
|
||||
@ -6864,6 +6864,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
||||
.addReg(Reg, RegState::Undef).addImm(0xff);
|
||||
return true;
|
||||
}
|
||||
case X86::AVX512_512_SEXT_MASK_32:
|
||||
case X86::AVX512_512_SEXT_MASK_64: {
|
||||
unsigned Reg = MIB->getOperand(0).getReg();
|
||||
unsigned MaskReg = MIB->getOperand(1).getReg();
|
||||
unsigned MaskState = getRegState(MIB->getOperand(1));
|
||||
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
|
||||
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
|
||||
MI.RemoveOperand(1);
|
||||
MIB->setDesc(get(Opc));
|
||||
// VPTERNLOG needs 3 register inputs and an immediate.
|
||||
// 0xff will return 1s for any input.
|
||||
MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
|
||||
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
|
||||
return true;
|
||||
}
|
||||
case X86::VMOVAPSZ128rm_NOVLX:
|
||||
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
|
||||
get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
|
||||
|
@ -6397,7 +6397,7 @@ let Predicates = [HasAVX] in {
|
||||
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround",
|
||||
int_x86_sse41_round_ss,
|
||||
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
|
||||
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
|
||||
}
|
||||
|
||||
let Predicates = [UseAVX] in {
|
||||
|
@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
|
||||
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
|
||||
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
|
||||
{ ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
|
||||
|
||||
{ ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
|
||||
{ ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
|
||||
};
|
||||
@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformConstCostTable[] = {
|
||||
{ ISD::SHL, MVT::v32i8, 2 }, // psllw + pand.
|
||||
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand.
|
||||
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
|
||||
|
||||
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
|
||||
|
||||
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
|
||||
@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE2UniformConstCostTable[] = {
|
||||
{ ISD::SHL, MVT::v16i8, 2 }, // psllw + pand.
|
||||
{ ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand.
|
||||
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
|
||||
|
||||
{ ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand).
|
||||
{ ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand).
|
||||
{ ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb).
|
||||
|
||||
{ ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
|
||||
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
|
||||
{ ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
|
||||
@ -207,6 +223,43 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v16i16, 1 }, // psllw.
|
||||
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
|
||||
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
|
||||
};
|
||||
|
||||
if (ST->hasAVX2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry SSE2UniformCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
|
||||
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
|
||||
|
||||
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
|
||||
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
|
||||
|
||||
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
|
||||
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
|
||||
};
|
||||
|
||||
if (ST->hasSSE2() &&
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX512DQCostTable[] = {
|
||||
{ ISD::MUL, MVT::v2i64, 1 },
|
||||
{ ISD::MUL, MVT::v4i64, 1 },
|
||||
@ -219,6 +272,10 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX512BWCostTable[] = {
|
||||
{ ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
|
||||
{ ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
|
||||
{ ISD::SRA, MVT::v32i16, 1 }, // vpsravw
|
||||
|
||||
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence.
|
||||
@ -259,7 +316,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX2CostTable[] = {
|
||||
static const CostTblEntry AVX2ShiftCostTable[] = {
|
||||
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
|
||||
// customize them to detect the cases where shift amount is a scalar one.
|
||||
{ ISD::SHL, MVT::v4i32, 1 },
|
||||
@ -283,11 +340,11 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
// is lowered into a vector multiply (vpmullw).
|
||||
return LT.first;
|
||||
|
||||
if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
|
||||
if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
static const CostTblEntry XOPCostTable[] = {
|
||||
static const CostTblEntry XOPShiftCostTable[] = {
|
||||
// 128bit shifts take 1cy, but right shifts require negation beforehand.
|
||||
{ ISD::SHL, MVT::v16i8, 1 },
|
||||
{ ISD::SRL, MVT::v16i8, 2 },
|
||||
@ -318,93 +375,20 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
|
||||
// Look for XOP lowering tricks.
|
||||
if (ST->hasXOP())
|
||||
if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
|
||||
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX2CustomCostTable[] = {
|
||||
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
|
||||
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
|
||||
|
||||
{ ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
|
||||
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
|
||||
|
||||
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
|
||||
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
|
||||
{ ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
|
||||
{ ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
|
||||
|
||||
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v8i32, 1 }, // pmulld
|
||||
{ ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
|
||||
|
||||
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
|
||||
};
|
||||
|
||||
// Look for AVX2 lowering tricks for custom cases.
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVXCustomCostTable[] = {
|
||||
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
|
||||
|
||||
{ ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/
|
||||
|
||||
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
|
||||
{ ISD::SDIV, MVT::v32i8, 32*20 },
|
||||
{ ISD::SDIV, MVT::v16i16, 16*20 },
|
||||
{ ISD::SDIV, MVT::v8i32, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i64, 4*20 },
|
||||
{ ISD::UDIV, MVT::v32i8, 32*20 },
|
||||
{ ISD::UDIV, MVT::v16i16, 16*20 },
|
||||
{ ISD::UDIV, MVT::v8i32, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i64, 4*20 },
|
||||
};
|
||||
|
||||
// Look for AVX2 lowering tricks for custom cases.
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
|
||||
LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry
|
||||
SSE2UniformCostTable[] = {
|
||||
static const CostTblEntry SSE2UniformShiftCostTable[] = {
|
||||
// Uniform splats are cheaper for the following instructions.
|
||||
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
|
||||
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
|
||||
{ ISD::SHL, MVT::v16i16, 2 }, // psllw.
|
||||
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
|
||||
{ ISD::SHL, MVT::v8i32, 2 }, // pslld
|
||||
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
|
||||
{ ISD::SHL, MVT::v4i64, 2 }, // psllq.
|
||||
|
||||
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v16i16, 2 }, // psrlw.
|
||||
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
|
||||
{ ISD::SRL, MVT::v8i32, 2 }, // psrld.
|
||||
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
|
||||
{ ISD::SRL, MVT::v4i64, 2 }, // psrlq.
|
||||
|
||||
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
|
||||
{ ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
|
||||
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
|
||||
{ ISD::SRA, MVT::v16i16, 2 }, // psraw.
|
||||
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
|
||||
{ ISD::SRA, MVT::v8i32, 2 }, // psrad.
|
||||
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
|
||||
{ ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
|
||||
@ -414,7 +398,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
|
||||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
|
||||
CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
}
|
||||
|
||||
@ -422,24 +406,98 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
|
||||
MVT VT = LT.second;
|
||||
// Vector shift left by non uniform constant can be lowered
|
||||
// into vector multiply (pmullw/pmulld).
|
||||
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
|
||||
(VT == MVT::v4i32 && ST->hasSSE41()))
|
||||
return LT.first;
|
||||
|
||||
// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
|
||||
// sequence of extract + two vector multiply + insert.
|
||||
if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
|
||||
(ST->hasAVX() && !ST->hasAVX2()))
|
||||
ISD = ISD::MUL;
|
||||
|
||||
// A vector shift left by non uniform constant is converted
|
||||
// into a vector multiply; the new multiply is eventually
|
||||
// lowered into a sequence of shuffles and 2 x pmuludq.
|
||||
if (VT == MVT::v4i32 && ST->hasSSE2())
|
||||
// into vector multiply.
|
||||
if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
|
||||
((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
|
||||
ISD = ISD::MUL;
|
||||
}
|
||||
|
||||
static const CostTblEntry AVX2CostTable[] = {
|
||||
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
|
||||
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
|
||||
|
||||
{ ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
|
||||
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
|
||||
|
||||
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
|
||||
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
|
||||
{ ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
|
||||
{ ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
|
||||
|
||||
{ ISD::SUB, MVT::v32i8, 1 }, // psubb
|
||||
{ ISD::ADD, MVT::v32i8, 1 }, // paddb
|
||||
{ ISD::SUB, MVT::v16i16, 1 }, // psubw
|
||||
{ ISD::ADD, MVT::v16i16, 1 }, // paddw
|
||||
{ ISD::SUB, MVT::v8i32, 1 }, // psubd
|
||||
{ ISD::ADD, MVT::v8i32, 1 }, // paddd
|
||||
{ ISD::SUB, MVT::v4i64, 1 }, // psubq
|
||||
{ ISD::ADD, MVT::v4i64, 1 }, // paddq
|
||||
|
||||
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v16i16, 1 }, // pmullw
|
||||
{ ISD::MUL, MVT::v8i32, 1 }, // pmulld
|
||||
{ ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
|
||||
|
||||
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
|
||||
};
|
||||
|
||||
// Look for AVX2 lowering tricks for custom cases.
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX1CostTable[] = {
|
||||
// We don't have to scalarize unsupported ops. We can issue two half-sized
|
||||
// operations and we only need to extract the upper YMM half.
|
||||
// Two ops + 1 extract + 1 insert = 4.
|
||||
{ ISD::MUL, MVT::v16i16, 4 },
|
||||
{ ISD::MUL, MVT::v8i32, 4 },
|
||||
{ ISD::SUB, MVT::v32i8, 4 },
|
||||
{ ISD::ADD, MVT::v32i8, 4 },
|
||||
{ ISD::SUB, MVT::v16i16, 4 },
|
||||
{ ISD::ADD, MVT::v16i16, 4 },
|
||||
{ ISD::SUB, MVT::v8i32, 4 },
|
||||
{ ISD::ADD, MVT::v8i32, 4 },
|
||||
{ ISD::SUB, MVT::v4i64, 4 },
|
||||
{ ISD::ADD, MVT::v4i64, 4 },
|
||||
|
||||
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
|
||||
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
|
||||
// Because we believe v4i64 to be a legal type, we must also include the
|
||||
// extract+insert in the cost table. Therefore, the cost here is 18
|
||||
// instead of 8.
|
||||
{ ISD::MUL, MVT::v4i64, 18 },
|
||||
|
||||
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
|
||||
|
||||
{ ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/
|
||||
|
||||
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
|
||||
{ ISD::SDIV, MVT::v32i8, 32*20 },
|
||||
{ ISD::SDIV, MVT::v16i16, 16*20 },
|
||||
{ ISD::SDIV, MVT::v8i32, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i64, 4*20 },
|
||||
{ ISD::UDIV, MVT::v32i8, 32*20 },
|
||||
{ ISD::UDIV, MVT::v16i16, 16*20 },
|
||||
{ ISD::UDIV, MVT::v8i32, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i64, 4*20 },
|
||||
};
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE42CostTable[] = {
|
||||
{ ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
|
||||
@ -456,6 +514,8 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
|
||||
{ ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
|
||||
{ ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
|
||||
{ ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
|
||||
{ ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld
|
||||
|
||||
{ ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
|
||||
{ ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
|
||||
@ -501,6 +561,7 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
|
||||
|
||||
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
|
||||
{ ISD::MUL, MVT::v8i16, 1 }, // pmullw
|
||||
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
|
||||
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
|
||||
|
||||
@ -516,46 +577,19 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
|
||||
// to hide "20 cycles" for each lane.
|
||||
{ ISD::SDIV, MVT::v16i8, 16*20 },
|
||||
{ ISD::SDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::SDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::SDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::SDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::SDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::UDIV, MVT::v16i8, 16*20 },
|
||||
{ ISD::UDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::UDIV, MVT::v2i64, 2*20 },
|
||||
{ ISD::UDIV, MVT::v8i16, 8*20 },
|
||||
{ ISD::UDIV, MVT::v4i32, 4*20 },
|
||||
{ ISD::UDIV, MVT::v2i64, 2*20 },
|
||||
};
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX1CostTable[] = {
|
||||
// We don't have to scalarize unsupported ops. We can issue two half-sized
|
||||
// operations and we only need to extract the upper YMM half.
|
||||
// Two ops + 1 extract + 1 insert = 4.
|
||||
{ ISD::MUL, MVT::v16i16, 4 },
|
||||
{ ISD::MUL, MVT::v8i32, 4 },
|
||||
{ ISD::SUB, MVT::v32i8, 4 },
|
||||
{ ISD::ADD, MVT::v32i8, 4 },
|
||||
{ ISD::SUB, MVT::v16i16, 4 },
|
||||
{ ISD::ADD, MVT::v16i16, 4 },
|
||||
{ ISD::SUB, MVT::v8i32, 4 },
|
||||
{ ISD::ADD, MVT::v8i32, 4 },
|
||||
{ ISD::SUB, MVT::v4i64, 4 },
|
||||
{ ISD::ADD, MVT::v4i64, 4 },
|
||||
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
|
||||
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
|
||||
// Because we believe v4i64 to be a legal type, we must also include the
|
||||
// extract+insert in the cost table. Therefore, the cost here is 18
|
||||
// instead of 8.
|
||||
{ ISD::MUL, MVT::v4i64, 18 },
|
||||
};
|
||||
|
||||
// Look for AVX1 lowering tricks.
|
||||
if (ST->hasAVX() && !ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE1CostTable[] = {
|
||||
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
|
||||
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
|
||||
@ -639,8 +673,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
|
||||
{ TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
|
||||
{ TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
|
||||
{ TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
|
||||
// + 2*pshufb + vinserti64x4
|
||||
{ TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2
|
||||
|
||||
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
|
||||
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
|
||||
|
@ -42,6 +42,8 @@
|
||||
using namespace llvm;
|
||||
using namespace lowertypetests;
|
||||
|
||||
using SummaryAction = LowerTypeTestsSummaryAction;
|
||||
|
||||
#define DEBUG_TYPE "lowertypetests"
|
||||
|
||||
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
|
||||
@ -55,9 +57,15 @@ static cl::opt<bool> AvoidReuse(
|
||||
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
|
||||
cl::Hidden, cl::init(true));
|
||||
|
||||
static cl::opt<std::string> ClSummaryAction(
|
||||
static cl::opt<SummaryAction> ClSummaryAction(
|
||||
"lowertypetests-summary-action",
|
||||
cl::desc("What to do with the summary when running this pass"), cl::Hidden);
|
||||
cl::desc("What to do with the summary when running this pass"),
|
||||
cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
|
||||
clEnumValN(SummaryAction::Import, "import",
|
||||
"Import typeid resolutions from summary and globals"),
|
||||
clEnumValN(SummaryAction::Export, "export",
|
||||
"Export typeid resolutions to summary and globals")),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<std::string> ClReadSummary(
|
||||
"lowertypetests-read-summary",
|
||||
@ -226,8 +234,8 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
|
||||
class LowerTypeTestsModule {
|
||||
Module &M;
|
||||
|
||||
// This is for testing purposes only.
|
||||
std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
|
||||
SummaryAction Action;
|
||||
ModuleSummaryIndex *Summary;
|
||||
|
||||
bool LinkerSubsectionsViaSymbols;
|
||||
Triple::ArchType Arch;
|
||||
@ -319,21 +327,38 @@ class LowerTypeTestsModule {
|
||||
void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
|
||||
|
||||
public:
|
||||
LowerTypeTestsModule(Module &M);
|
||||
~LowerTypeTestsModule();
|
||||
LowerTypeTestsModule(Module &M, SummaryAction Action,
|
||||
ModuleSummaryIndex *Summary);
|
||||
bool lower();
|
||||
|
||||
// Lower the module using the action and summary passed as command line
|
||||
// arguments. For testing purposes only.
|
||||
static bool runForTesting(Module &M);
|
||||
};
|
||||
|
||||
struct LowerTypeTests : public ModulePass {
|
||||
static char ID;
|
||||
LowerTypeTests() : ModulePass(ID) {
|
||||
|
||||
bool UseCommandLine = false;
|
||||
|
||||
SummaryAction Action;
|
||||
ModuleSummaryIndex *Summary;
|
||||
|
||||
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
|
||||
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
|
||||
: ModulePass(ID), Action(Action), Summary(Summary) {
|
||||
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
if (skipModule(M))
|
||||
return false;
|
||||
return LowerTypeTestsModule(M).lower();
|
||||
if (UseCommandLine)
|
||||
return LowerTypeTestsModule::runForTesting(M);
|
||||
return LowerTypeTestsModule(M, Action, Summary).lower();
|
||||
}
|
||||
};
|
||||
|
||||
@ -343,7 +368,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
|
||||
false)
|
||||
char LowerTypeTests::ID = 0;
|
||||
|
||||
ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; }
|
||||
ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
|
||||
ModuleSummaryIndex *Summary) {
|
||||
return new LowerTypeTests(Action, Summary);
|
||||
}
|
||||
|
||||
/// Build a bit set for TypeId using the object layouts in
|
||||
/// GlobalLayout.
|
||||
@ -1145,22 +1173,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
|
||||
}
|
||||
|
||||
/// Lower all type tests in this module.
|
||||
LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
|
||||
// Handle the command-line summary arguments. This code is for testing
|
||||
// purposes only, so we handle errors directly.
|
||||
if (!ClSummaryAction.empty()) {
|
||||
OwnedSummary = make_unique<ModuleSummaryIndex>();
|
||||
if (!ClReadSummary.empty()) {
|
||||
ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
|
||||
": ");
|
||||
auto ReadSummaryFile =
|
||||
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
|
||||
|
||||
yaml::Input In(ReadSummaryFile->getBuffer());
|
||||
In >> *OwnedSummary;
|
||||
ExitOnErr(errorCodeToError(In.error()));
|
||||
}
|
||||
}
|
||||
LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
|
||||
ModuleSummaryIndex *Summary)
|
||||
: M(M), Action(Action), Summary(Summary) {
|
||||
// FIXME: Use these fields.
|
||||
(void)this->Action;
|
||||
(void)this->Summary;
|
||||
|
||||
Triple TargetTriple(M.getTargetTriple());
|
||||
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
|
||||
@ -1169,18 +1187,36 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
|
||||
ObjectFormat = TargetTriple.getObjectFormat();
|
||||
}
|
||||
|
||||
LowerTypeTestsModule::~LowerTypeTestsModule() {
|
||||
if (ClSummaryAction.empty() || ClWriteSummary.empty())
|
||||
return;
|
||||
bool LowerTypeTestsModule::runForTesting(Module &M) {
|
||||
ModuleSummaryIndex Summary;
|
||||
|
||||
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
|
||||
": ");
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
|
||||
ExitOnErr(errorCodeToError(EC));
|
||||
// Handle the command-line summary arguments. This code is for testing
|
||||
// purposes only, so we handle errors directly.
|
||||
if (!ClReadSummary.empty()) {
|
||||
ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
|
||||
": ");
|
||||
auto ReadSummaryFile =
|
||||
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
|
||||
|
||||
yaml::Output Out(OS);
|
||||
Out << *OwnedSummary;
|
||||
yaml::Input In(ReadSummaryFile->getBuffer());
|
||||
In >> Summary;
|
||||
ExitOnErr(errorCodeToError(In.error()));
|
||||
}
|
||||
|
||||
bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
|
||||
|
||||
if (!ClWriteSummary.empty()) {
|
||||
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
|
||||
": ");
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
|
||||
ExitOnErr(errorCodeToError(EC));
|
||||
|
||||
yaml::Output Out(OS);
|
||||
Out << Summary;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool LowerTypeTestsModule::lower() {
|
||||
@ -1313,7 +1349,8 @@ bool LowerTypeTestsModule::lower() {
|
||||
|
||||
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
|
||||
ModuleAnalysisManager &AM) {
|
||||
bool Changed = LowerTypeTestsModule(M).lower();
|
||||
bool Changed =
|
||||
LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
return PreservedAnalyses::none();
|
||||
|
@ -857,7 +857,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
|
||||
// Lower type metadata and the type.test intrinsic. This pass supports Clang's
|
||||
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
|
||||
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
|
||||
PM.add(createLowerTypeTestsPass());
|
||||
PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
|
||||
/*Summary=*/nullptr));
|
||||
|
||||
if (OptLevel != 0)
|
||||
addLateLTOOptimizationPasses(PM);
|
||||
|
@ -1903,7 +1903,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
|
||||
return foldICmpShlOne(Cmp, Shl, C);
|
||||
|
||||
// Check that the shift amount is in range. If not, don't perform undefined
|
||||
// shifts. When the shift is visited it will be simplified.
|
||||
// shifts. When the shift is visited, it will be simplified.
|
||||
unsigned TypeBits = C->getBitWidth();
|
||||
if (ShiftAmt->uge(TypeBits))
|
||||
return nullptr;
|
||||
@ -1923,7 +1923,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
|
||||
return new ICmpInst(Pred, X, LShrC);
|
||||
|
||||
if (Shl->hasOneUse()) {
|
||||
// Otherwise strength reduce the shift into an and.
|
||||
// Otherwise, strength reduce the shift into an and.
|
||||
Constant *Mask = ConstantInt::get(Shl->getType(),
|
||||
APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
|
||||
|
||||
@ -1951,7 +1951,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
|
||||
}
|
||||
|
||||
// When the shift is nuw and pred is >u or <=u, comparison only really happens
|
||||
// in the pre-shifted bits. Since InstSimplify canoncalizes <=u into <u, the
|
||||
// in the pre-shifted bits. Since InstSimplify canonicalizes <=u into <u, the
|
||||
// <=u case can be further converted to match <u (see below).
|
||||
if (Shl->hasNoUnsignedWrap() &&
|
||||
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) {
|
||||
@ -1970,9 +1970,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
|
||||
// Transform (icmp pred iM (shl iM %v, N), C)
|
||||
// -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
|
||||
// Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
|
||||
// This enables us to get rid of the shift in favor of a trunc which can be
|
||||
// This enables us to get rid of the shift in favor of a trunc that may be
|
||||
// free on the target. It has the additional benefit of comparing to a
|
||||
// smaller constant, which will be target friendly.
|
||||
// smaller constant that may be more target-friendly.
|
||||
unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
|
||||
if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt &&
|
||||
DL.isLegalInteger(TypeBits - Amt)) {
|
||||
|
@ -1818,6 +1818,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
|
||||
RegisteredFlag = new GlobalVariable(
|
||||
M, IntptrTy, false, GlobalVariable::CommonLinkage,
|
||||
ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
|
||||
RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
|
||||
|
||||
// Update llvm.compiler.used, adding the new liveness globals. This is
|
||||
// needed so that during LTO these variables stay alive. The alternative
|
||||
|
@ -1423,7 +1423,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
||||
if (widenLoopCompare(DU))
|
||||
return nullptr;
|
||||
|
||||
// This user does not evaluate to a recurence after widening, so don't
|
||||
// This user does not evaluate to a recurrence after widening, so don't
|
||||
// follow it. Instead insert a Trunc to kill off the original use,
|
||||
// eventually isolating the original narrow IV so it can be removed.
|
||||
truncateIVUse(DU, DT, LI);
|
||||
|
@ -415,7 +415,9 @@ class LoadEliminationForLoop {
|
||||
Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
|
||||
PH->getTerminator());
|
||||
Value *Initial =
|
||||
new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
|
||||
new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
|
||||
Cand.Load->getAlignment(), PH->getTerminator());
|
||||
|
||||
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
|
||||
&L->getHeader()->front());
|
||||
PHI->addIncoming(Initial, PH);
|
||||
|
@ -1382,8 +1382,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
|
||||
Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
|
||||
Succ->begin(), Succ->end());
|
||||
LPM->deleteSimpleAnalysisValue(BI, L);
|
||||
BI->eraseFromParent();
|
||||
RemoveFromWorklist(BI, Worklist);
|
||||
BI->eraseFromParent();
|
||||
|
||||
// Remove Succ from the loop tree.
|
||||
LI->removeBlock(Succ);
|
||||
|
@ -79,7 +79,8 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
|
||||
STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
|
||||
STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
|
||||
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
|
||||
STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
|
||||
STATISTIC(NumGVNMaxIterations,
|
||||
"Maximum Number of iterations it took to converge GVN");
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GVN Pass
|
||||
@ -327,7 +328,7 @@ class NewGVN : public FunctionPass {
|
||||
// Elimination.
|
||||
struct ValueDFS;
|
||||
void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
|
||||
std::vector<ValueDFS> &);
|
||||
SmallVectorImpl<ValueDFS> &);
|
||||
|
||||
bool eliminateInstructions(Function &);
|
||||
void replaceInstruction(Instruction *, Value *);
|
||||
@ -336,8 +337,11 @@ class NewGVN : public FunctionPass {
|
||||
|
||||
// New instruction creation.
|
||||
void handleNewInstruction(Instruction *){};
|
||||
|
||||
// Various instruction touch utilities
|
||||
void markUsersTouched(Value *);
|
||||
void markMemoryUsersTouched(MemoryAccess *);
|
||||
void markLeaderChangeTouched(CongruenceClass *CC);
|
||||
|
||||
// Utilities.
|
||||
void cleanupTables();
|
||||
@ -390,10 +394,10 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
||||
INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
|
||||
|
||||
PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
|
||||
BasicBlock *PhiBlock = I->getParent();
|
||||
BasicBlock *PHIBlock = I->getParent();
|
||||
auto *PN = cast<PHINode>(I);
|
||||
auto *E = new (ExpressionAllocator)
|
||||
PHIExpression(PN->getNumOperands(), I->getParent());
|
||||
auto *E =
|
||||
new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
|
||||
|
||||
E->allocateOperands(ArgRecycler, ExpressionAllocator);
|
||||
E->setType(I->getType());
|
||||
@ -408,10 +412,10 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
|
||||
|
||||
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
|
||||
[&](const Use &U) -> Value * {
|
||||
// Don't try to transform self-defined phis
|
||||
// Don't try to transform self-defined phis.
|
||||
if (U == PN)
|
||||
return PN;
|
||||
const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock);
|
||||
const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
|
||||
return lookupOperandLeader(U, I, BBE);
|
||||
});
|
||||
return E;
|
||||
@ -710,6 +714,15 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
|
||||
return E;
|
||||
}
|
||||
|
||||
// Utility function to check whether the congruence class has a member other
|
||||
// than the given instruction.
|
||||
bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
|
||||
// Either it has more than one member, in which case it must contain something
|
||||
// other than us (because it's indexed by value), or if it only has one member
|
||||
// right now, that member should not be us.
|
||||
return CC->Members.size() > 1 || CC->Members.count(I) == 0;
|
||||
}
|
||||
|
||||
const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
|
||||
const BasicBlock *B) {
|
||||
// Unlike loads, we never try to eliminate stores, so we do not check if they
|
||||
@ -725,8 +738,12 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
|
||||
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
|
||||
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
|
||||
CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
|
||||
// Basically, check if the congruence class the store is in is defined by a
|
||||
// store that isn't us, and has the same value. MemorySSA takes care of
|
||||
// ensuring the store has the same memory state as us already.
|
||||
if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
|
||||
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
|
||||
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
|
||||
hasMemberOtherThanUs(CC, I))
|
||||
return createStoreExpression(SI, StoreRHS, B);
|
||||
}
|
||||
|
||||
@ -810,36 +827,50 @@ bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
|
||||
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
|
||||
const BasicBlock *B) {
|
||||
auto *E = cast<PHIExpression>(createPHIExpression(I));
|
||||
if (E->op_empty()) {
|
||||
// We match the semantics of SimplifyPhiNode from InstructionSimplify here.
|
||||
|
||||
// See if all arguaments are the same.
|
||||
// We track if any were undef because they need special handling.
|
||||
bool HasUndef = false;
|
||||
auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
|
||||
if (Arg == I)
|
||||
return false;
|
||||
if (isa<UndefValue>(Arg)) {
|
||||
HasUndef = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
// If we are left with no operands, it's undef
|
||||
if (Filtered.begin() == Filtered.end()) {
|
||||
DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
|
||||
<< "\n");
|
||||
E->deallocateOperands(ArgRecycler);
|
||||
ExpressionAllocator.Deallocate(E);
|
||||
return createConstantExpression(UndefValue::get(I->getType()));
|
||||
}
|
||||
|
||||
Value *AllSameValue = E->getOperand(0);
|
||||
|
||||
// See if all arguments are the same, ignoring undef arguments, because we can
|
||||
// choose a value that is the same for them.
|
||||
for (const Value *Arg : E->operands())
|
||||
if (Arg != AllSameValue && !isa<UndefValue>(Arg)) {
|
||||
AllSameValue = nullptr;
|
||||
break;
|
||||
Value *AllSameValue = *(Filtered.begin());
|
||||
++Filtered.begin();
|
||||
// Can't use std::equal here, sadly, because filter.begin moves.
|
||||
if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
|
||||
return V == AllSameValue;
|
||||
})) {
|
||||
// In LLVM's non-standard representation of phi nodes, it's possible to have
|
||||
// phi nodes with cycles (IE dependent on other phis that are .... dependent
|
||||
// on the original phi node), especially in weird CFG's where some arguments
|
||||
// are unreachable, or uninitialized along certain paths. This can cause
|
||||
// infinite loops during evaluation. We work around this by not trying to
|
||||
// really evaluate them independently, but instead using a variable
|
||||
// expression to say if one is equivalent to the other.
|
||||
// We also special case undef, so that if we have an undef, we can't use the
|
||||
// common value unless it dominates the phi block.
|
||||
if (HasUndef) {
|
||||
// Only have to check for instructions
|
||||
if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
|
||||
if (!DT->dominates(AllSameInst, I))
|
||||
return E;
|
||||
}
|
||||
|
||||
if (AllSameValue) {
|
||||
// It's possible to have phi nodes with cycles (IE dependent on
|
||||
// other phis that are .... dependent on the original phi node),
|
||||
// especially in weird CFG's where some arguments are unreachable, or
|
||||
// uninitialized along certain paths.
|
||||
// This can cause infinite loops during evaluation (even if you disable
|
||||
// the recursion below, you will simply ping-pong between congruence
|
||||
// classes). If a phi node symbolically evaluates to another phi node,
|
||||
// just leave it alone. If they are really the same, we will still
|
||||
// eliminate them in favor of each other.
|
||||
if (isa<PHINode>(AllSameValue))
|
||||
return E;
|
||||
NumGVNPhisAllSame++;
|
||||
DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
|
||||
<< "\n");
|
||||
@ -1007,12 +1038,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
|
||||
}
|
||||
}
|
||||
|
||||
// Touch the instructions that need to be updated after a congruence class has a
|
||||
// leader change, and mark changed values.
|
||||
void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
|
||||
for (auto M : CC->Members) {
|
||||
if (auto *I = dyn_cast<Instruction>(M))
|
||||
TouchedInstructions.set(InstrDFS[I]);
|
||||
ChangedValues.insert(M);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform congruence finding on a given value numbering expression.
|
||||
void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
|
||||
|
||||
ValueToExpression[V] = E;
|
||||
// This is guaranteed to return something, since it will at least find
|
||||
// INITIAL.
|
||||
|
||||
CongruenceClass *VClass = ValueToClass[V];
|
||||
assert(VClass && "Should have found a vclass");
|
||||
// Dead classes should have been eliminated from the mapping.
|
||||
@ -1031,14 +1072,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
|
||||
place->second = NewClass;
|
||||
|
||||
// Constants and variables should always be made the leader.
|
||||
if (const auto *CE = dyn_cast<ConstantExpression>(E))
|
||||
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
|
||||
NewClass->RepLeader = CE->getConstantValue();
|
||||
else if (const auto *VE = dyn_cast<VariableExpression>(E))
|
||||
NewClass->RepLeader = VE->getVariableValue();
|
||||
else if (const auto *SE = dyn_cast<StoreExpression>(E))
|
||||
NewClass->RepLeader = SE->getStoreInst()->getValueOperand();
|
||||
else
|
||||
} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
|
||||
StoreInst *SI = SE->getStoreInst();
|
||||
NewClass->RepLeader =
|
||||
lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
|
||||
} else {
|
||||
NewClass->RepLeader = V;
|
||||
}
|
||||
assert(!isa<VariableExpression>(E) &&
|
||||
"VariableExpression should have been handled already");
|
||||
|
||||
EClass = NewClass;
|
||||
DEBUG(dbgs() << "Created new congruence class for " << *V
|
||||
@ -1077,14 +1121,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
|
||||
ExpressionToClass.erase(VClass->DefiningExpr);
|
||||
}
|
||||
} else if (VClass->RepLeader == V) {
|
||||
// FIXME: When the leader changes, the value numbering of
|
||||
// everything may change, so we need to reprocess.
|
||||
// When the leader changes, the value numbering of
|
||||
// everything may change due to symbolization changes, so we need to
|
||||
// reprocess.
|
||||
VClass->RepLeader = *(VClass->Members.begin());
|
||||
for (auto M : VClass->Members) {
|
||||
if (auto *I = dyn_cast<Instruction>(M))
|
||||
TouchedInstructions.set(InstrDFS[I]);
|
||||
ChangedValues.insert(M);
|
||||
}
|
||||
markLeaderChangeTouched(VClass);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1106,6 +1147,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
|
||||
markMemoryUsersTouched(MA);
|
||||
}
|
||||
}
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
|
||||
// There is, sadly, one complicating thing for stores. Stores do not
|
||||
// produce values, only consume them. However, in order to make loads and
|
||||
// stores value number the same, we ignore the value operand of the store.
|
||||
// But the value operand will still be the leader of our class, and thus, it
|
||||
// may change. Because the store is a use, the store will get reprocessed,
|
||||
// but nothing will change about it, and so nothing above will catch it
|
||||
// (since the class will not change). In order to make sure everything ends
|
||||
// up okay, we need to recheck the leader of the class. Since stores of
|
||||
// different values value number differently due to different memorydefs, we
|
||||
// are guaranteed the leader is always the same between stores in the same
|
||||
// class.
|
||||
DEBUG(dbgs() << "Checking store leader\n");
|
||||
auto ProperLeader =
|
||||
lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
|
||||
if (EClass->RepLeader != ProperLeader) {
|
||||
DEBUG(dbgs() << "Store leader changed, fixing\n");
|
||||
EClass->RepLeader = ProperLeader;
|
||||
markLeaderChangeTouched(EClass);
|
||||
markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1708,8 +1770,9 @@ struct NewGVN::ValueDFS {
|
||||
}
|
||||
};
|
||||
|
||||
void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
|
||||
std::vector<ValueDFS> &DFSOrderedSet) {
|
||||
void NewGVN::convertDenseToDFSOrdered(
|
||||
CongruenceClass::MemberSet &Dense,
|
||||
SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
|
||||
for (auto D : Dense) {
|
||||
// First add the value.
|
||||
BasicBlock *BB = getBlockForValue(D);
|
||||
@ -1972,21 +2035,25 @@ bool NewGVN::eliminateInstructions(Function &F) {
|
||||
ValueDFSStack EliminationStack;
|
||||
|
||||
// Convert the members to DFS ordered sets and then merge them.
|
||||
std::vector<ValueDFS> DFSOrderedSet;
|
||||
SmallVector<ValueDFS, 8> DFSOrderedSet;
|
||||
convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
|
||||
|
||||
// Sort the whole thing.
|
||||
sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
|
||||
std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
|
||||
|
||||
for (auto &C : DFSOrderedSet) {
|
||||
int MemberDFSIn = C.DFSIn;
|
||||
int MemberDFSOut = C.DFSOut;
|
||||
Value *Member = C.Val;
|
||||
Use *MemberUse = C.U;
|
||||
for (auto &VD : DFSOrderedSet) {
|
||||
int MemberDFSIn = VD.DFSIn;
|
||||
int MemberDFSOut = VD.DFSOut;
|
||||
Value *Member = VD.Val;
|
||||
Use *MemberUse = VD.U;
|
||||
|
||||
// We ignore void things because we can't get a value from them.
|
||||
if (Member && Member->getType()->isVoidTy())
|
||||
continue;
|
||||
if (Member) {
|
||||
// We ignore void things because we can't get a value from them.
|
||||
// FIXME: We could actually use this to kill dead stores that are
|
||||
// dominated by equivalent earlier stores.
|
||||
if (Member->getType()->isVoidTy())
|
||||
continue;
|
||||
}
|
||||
|
||||
if (EliminationStack.empty()) {
|
||||
DEBUG(dbgs() << "Elimination Stack is empty\n");
|
||||
@ -1995,8 +2062,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
|
||||
<< EliminationStack.dfs_back().first << ","
|
||||
<< EliminationStack.dfs_back().second << ")\n");
|
||||
}
|
||||
if (Member && isa<Constant>(Member))
|
||||
assert(isa<Constant>(CC->RepLeader));
|
||||
|
||||
DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << ","
|
||||
<< MemberDFSOut << ")\n");
|
||||
@ -2037,11 +2102,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
|
||||
continue;
|
||||
Value *Result = EliminationStack.back();
|
||||
|
||||
// Don't replace our existing users with ourselves, and don't replace
|
||||
// phi node arguments with the result of the same phi node.
|
||||
// IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef)
|
||||
if (MemberUse->get() == Result ||
|
||||
(isa<PHINode>(Result) && MemberUse->getUser() == Result))
|
||||
// Don't replace our existing users with ourselves.
|
||||
if (MemberUse->get() == Result)
|
||||
continue;
|
||||
|
||||
DEBUG(dbgs() << "Found replacement " << *Result << " for "
|
||||
|
@ -511,9 +511,6 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
|
||||
void visitSelectInst(SelectInst &I);
|
||||
void visitBinaryOperator(Instruction &I);
|
||||
void visitCmpInst(CmpInst &I);
|
||||
void visitExtractElementInst(ExtractElementInst &I);
|
||||
void visitInsertElementInst(InsertElementInst &I);
|
||||
void visitShuffleVectorInst(ShuffleVectorInst &I);
|
||||
void visitExtractValueInst(ExtractValueInst &EVI);
|
||||
void visitInsertValueInst(InsertValueInst &IVI);
|
||||
void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
|
||||
@ -970,21 +967,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
|
||||
markOverdefined(&I);
|
||||
}
|
||||
|
||||
void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
|
||||
// TODO : SCCP does not handle vectors properly.
|
||||
return markOverdefined(&I);
|
||||
}
|
||||
|
||||
void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
|
||||
// TODO : SCCP does not handle vectors properly.
|
||||
return markOverdefined(&I);
|
||||
}
|
||||
|
||||
void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
|
||||
// TODO : SCCP does not handle vectors properly.
|
||||
return markOverdefined(&I);
|
||||
}
|
||||
|
||||
// Handle getelementptr instructions. If all operands are constants then we
|
||||
// can turn this into a getelementptr ConstantExpr.
|
||||
//
|
||||
|
@ -67,12 +67,15 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
|
||||
return true;
|
||||
}
|
||||
|
||||
// When exporting, consult the index.
|
||||
auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID());
|
||||
assert(Summaries != ImportIndex.end() &&
|
||||
"Missing summary for global value when exporting");
|
||||
assert(Summaries->second.size() == 1 && "Local has more than one summary");
|
||||
auto Linkage = Summaries->second.front()->linkage();
|
||||
// When exporting, consult the index. We can have more than one local
|
||||
// with the same GUID, in the case of same-named locals in different but
|
||||
// same-named source files that were compiled in their respective directories
|
||||
// (so the source file name and resulting GUID is the same). Find the one
|
||||
// in this module.
|
||||
auto Summary = ImportIndex.findSummaryInModule(
|
||||
SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
|
||||
assert(Summary && "Missing summary for global value when exporting");
|
||||
auto Linkage = Summary->linkage();
|
||||
if (!GlobalValue::isLocalLinkage(Linkage)) {
|
||||
assert(!isNonRenamableLocal(*SGV) &&
|
||||
"Attempting to promote non-renamable local");
|
||||
|
@ -1189,19 +1189,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
|
||||
|
||||
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
Value *Ret = nullptr;
|
||||
StringRef Name = Callee->getName();
|
||||
if (Name == "fabs" && hasFloatVersion(Name))
|
||||
Ret = optimizeUnaryDoubleFP(CI, B, false);
|
||||
return optimizeUnaryDoubleFP(CI, B, false);
|
||||
|
||||
Value *Op = CI->getArgOperand(0);
|
||||
if (Instruction *I = dyn_cast<Instruction>(Op)) {
|
||||
// Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
|
||||
if (I->getOpcode() == Instruction::FMul)
|
||||
if (I->getOperand(0) == I->getOperand(1))
|
||||
return Op;
|
||||
}
|
||||
return Ret;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
|
||||
|
@ -783,6 +783,10 @@ class InnerLoopVectorizer {
|
||||
// Similarly, we create a new latch condition when setting up the structure
|
||||
// of the new loop, so the old one can become dead.
|
||||
SmallPtrSet<Instruction *, 4> DeadInstructions;
|
||||
|
||||
// Holds the end values for each induction variable. We save the end values
|
||||
// so we can later fix-up the external users of the induction variables.
|
||||
DenseMap<PHINode *, Value *> IVEndValues;
|
||||
};
|
||||
|
||||
class InnerLoopUnroller : public InnerLoopVectorizer {
|
||||
@ -1879,13 +1883,6 @@ class LoopVectorizationCostModel {
|
||||
unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
|
||||
unsigned LoopCost);
|
||||
|
||||
/// \return The most profitable unroll factor.
|
||||
/// This method finds the best unroll-factor based on register pressure and
|
||||
/// other parameters. VF and LoopCost are the selected vectorization factor
|
||||
/// and the cost of the selected VF.
|
||||
unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
|
||||
unsigned LoopCost);
|
||||
|
||||
/// \brief A struct that represents some properties of the register usage
|
||||
/// of a loop.
|
||||
struct RegisterUsage {
|
||||
@ -3424,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
// Create phi nodes to merge from the backedge-taken check block.
|
||||
PHINode *BCResumeVal = PHINode::Create(
|
||||
OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
|
||||
Value *EndValue;
|
||||
Value *&EndValue = IVEndValues[OrigPhi];
|
||||
if (OrigPhi == OldInduction) {
|
||||
// We know what the end value is.
|
||||
EndValue = CountRoundDown;
|
||||
@ -3443,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
|
||||
// or the value at the end of the vectorized loop.
|
||||
BCResumeVal->addIncoming(EndValue, MiddleBlock);
|
||||
|
||||
// Fix up external users of the induction variable.
|
||||
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
|
||||
|
||||
// Fix the scalar body counter (PHI node).
|
||||
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
|
||||
|
||||
@ -4116,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
|
||||
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
|
||||
} // end of for each Phi in PHIsToFix.
|
||||
|
||||
fixLCSSAPHIs();
|
||||
|
||||
// Make sure DomTree is updated.
|
||||
// Update the dominator tree.
|
||||
//
|
||||
// FIXME: After creating the structure of the new loop, the dominator tree is
|
||||
// no longer up-to-date, and it remains that way until we update it
|
||||
// here. An out-of-date dominator tree is problematic for SCEV,
|
||||
// because SCEVExpander uses it to guide code generation. The
|
||||
// vectorizer use SCEVExpanders in several places. Instead, we should
|
||||
// keep the dominator tree up-to-date as we go.
|
||||
updateAnalysis();
|
||||
|
||||
// Fix-up external users of the induction variables.
|
||||
for (auto &Entry : *Legal->getInductionVars())
|
||||
fixupIVUsers(Entry.first, Entry.second,
|
||||
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
|
||||
IVEndValues[Entry.first], LoopMiddleBlock);
|
||||
|
||||
fixLCSSAPHIs();
|
||||
predicateInstructions();
|
||||
|
||||
// Remove redundant induction instructions.
|
||||
|
@ -651,7 +651,8 @@ class Expr : public Stmt {
|
||||
/// constant.
|
||||
bool EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
|
||||
const FunctionDecl *Callee,
|
||||
ArrayRef<const Expr*> Args) const;
|
||||
ArrayRef<const Expr*> Args,
|
||||
const Expr *This = nullptr) const;
|
||||
|
||||
/// \brief If the current Expr is a pointer, this will try to statically
|
||||
/// determine the number of bytes available where the pointer is pointing.
|
||||
|
@ -140,12 +140,15 @@ class Argument<string name, bit optional, bit fake = 0> {
|
||||
bit Fake = fake;
|
||||
}
|
||||
|
||||
class BoolArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class BoolArgument<string name, bit opt = 0, bit fake = 0> : Argument<name, opt,
|
||||
fake>;
|
||||
class IdentifierArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class IntArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class StringArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class ExprArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class FunctionArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class FunctionArgument<string name, bit opt = 0, bit fake = 0> : Argument<name,
|
||||
opt,
|
||||
fake>;
|
||||
class TypeArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class UnsignedArgument<string name, bit opt = 0> : Argument<name, opt>;
|
||||
class VariadicUnsignedArgument<string name> : Argument<name, 1>;
|
||||
@ -1591,6 +1594,26 @@ def Unavailable : InheritableAttr {
|
||||
let Documentation = [Undocumented];
|
||||
}
|
||||
|
||||
def DiagnoseIf : InheritableAttr {
|
||||
let Spellings = [GNU<"diagnose_if">];
|
||||
let Subjects = SubjectList<[Function]>;
|
||||
let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
|
||||
EnumArgument<"DiagnosticType",
|
||||
"DiagnosticType",
|
||||
["error", "warning"],
|
||||
["DT_Error", "DT_Warning"]>,
|
||||
BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
|
||||
FunctionArgument<"Parent", 0, /*fake*/ 1>];
|
||||
let DuplicatesAllowedWhileMerging = 1;
|
||||
let LateParsed = 1;
|
||||
let AdditionalMembers = [{
|
||||
bool isError() const { return diagnosticType == DT_Error; }
|
||||
bool isWarning() const { return diagnosticType == DT_Warning; }
|
||||
}];
|
||||
let TemplateDependent = 1;
|
||||
let Documentation = [DiagnoseIfDocs];
|
||||
}
|
||||
|
||||
def ArcWeakrefUnavailable : InheritableAttr {
|
||||
let Spellings = [GNU<"objc_arc_weak_reference_unavailable">];
|
||||
let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
|
||||
|
@ -378,6 +378,65 @@ template instantiation, so the value for ``T::number`` is known.
|
||||
}];
|
||||
}
|
||||
|
||||
def DiagnoseIfDocs : Documentation {
|
||||
let Category = DocCatFunction;
|
||||
let Content = [{
|
||||
The ``diagnose_if`` attribute can be placed on function declarations to emit
|
||||
warnings or errors at compile-time if calls to the attributed function meet
|
||||
certain user-defined criteria. For example:
|
||||
|
||||
.. code-block:: c
|
||||
void abs(int a)
|
||||
__attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning")));
|
||||
void must_abs(int a)
|
||||
__attribute__((diagnose_if(a >= 0, "Redundant abs call", "error")));
|
||||
|
||||
int val = abs(1); // warning: Redundant abs call
|
||||
int val2 = must_abs(1); // error: Redundant abs call
|
||||
int val3 = abs(val);
|
||||
int val4 = must_abs(val); // Because run-time checks are not emitted for
|
||||
// diagnose_if attributes, this executes without
|
||||
// issue.
|
||||
|
||||
|
||||
``diagnose_if`` is closely related to ``enable_if``, with a few key differences:
|
||||
|
||||
* Overload resolution is not aware of ``diagnose_if`` attributes: they're
|
||||
considered only after we select the best candidate from a given candidate set.
|
||||
* Function declarations that differ only in their ``diagnose_if`` attributes are
|
||||
considered to be redeclarations of the same function (not overloads).
|
||||
* If the condition provided to ``diagnose_if`` cannot be evaluated, no
|
||||
diagnostic will be emitted.
|
||||
|
||||
Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``.
|
||||
|
||||
As a result of bullet number two, ``diagnose_if`` attributes will stack on the
|
||||
same function. For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int foo() __attribute__((diagnose_if(1, "diag1", "warning")));
|
||||
int foo() __attribute__((diagnose_if(1, "diag2", "warning")));
|
||||
|
||||
int bar = foo(); // warning: diag1
|
||||
// warning: diag2
|
||||
int (*fooptr)(void) = foo; // warning: diag1
|
||||
// warning: diag2
|
||||
|
||||
constexpr int supportsAPILevel(int N) { return N < 5; }
|
||||
int baz(int a)
|
||||
__attribute__((diagnose_if(!supportsAPILevel(10),
|
||||
"Upgrade to API level 10 to use baz", "error")));
|
||||
int baz(int a)
|
||||
__attribute__((diagnose_if(!a, "0 is not recommended.", "warning")));
|
||||
|
||||
int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz
|
||||
int v = baz(0); // error: Upgrade to API level 10 to use baz
|
||||
|
||||
Query for this feature with ``__has_attribute(diagnose_if)``.
|
||||
}];
|
||||
}
|
||||
|
||||
def PassObjectSizeDocs : Documentation {
|
||||
let Category = DocCatVariable; // Technically it's a parameter doc, but eh.
|
||||
let Content = [{
|
||||
|
@ -161,6 +161,8 @@ def ext_old_implicitly_unsigned_long_cxx : ExtWarn<
|
||||
InGroup<CXX11Compat>;
|
||||
def ext_clang_enable_if : Extension<"'enable_if' is a clang extension">,
|
||||
InGroup<GccCompat>;
|
||||
def ext_clang_diagnose_if : Extension<"'diagnose_if' is a clang extension">,
|
||||
InGroup<GccCompat>;
|
||||
|
||||
// SEH
|
||||
def err_seh_expected_handler : Error<
|
||||
|
@ -495,6 +495,7 @@ def UnusedPropertyIvar : DiagGroup<"unused-property-ivar">;
|
||||
def UnusedGetterReturnValue : DiagGroup<"unused-getter-return-value">;
|
||||
def UsedButMarkedUnused : DiagGroup<"used-but-marked-unused">;
|
||||
def UserDefinedLiterals : DiagGroup<"user-defined-literals">;
|
||||
def UserDefinedWarnings : DiagGroup<"user-defined-warnings">;
|
||||
def Reorder : DiagGroup<"reorder">;
|
||||
def UndeclaredSelector : DiagGroup<"undeclared-selector">;
|
||||
def ImplicitAtomic : DiagGroup<"implicit-atomic-properties">;
|
||||
@ -683,7 +684,8 @@ def Most : DiagGroup<"most", [
|
||||
OverloadedVirtual,
|
||||
PrivateExtern,
|
||||
SelTypeCast,
|
||||
ExternCCompat
|
||||
ExternCCompat,
|
||||
UserDefinedWarnings
|
||||
]>;
|
||||
|
||||
// Thread Safety warnings
|
||||
|
@ -2141,8 +2141,11 @@ def err_constexpr_local_var_no_init : Error<
|
||||
def ext_constexpr_function_never_constant_expr : ExtWarn<
|
||||
"constexpr %select{function|constructor}0 never produces a "
|
||||
"constant expression">, InGroup<DiagGroup<"invalid-constexpr">>, DefaultError;
|
||||
def err_enable_if_never_constant_expr : Error<
|
||||
"'enable_if' attribute expression never produces a constant expression">;
|
||||
def err_attr_cond_never_constant_expr : Error<
|
||||
"%0 attribute expression never produces a constant expression">;
|
||||
def err_diagnose_if_invalid_diagnostic_type : Error<
|
||||
"invalid diagnostic type for 'diagnose_if'; use \"error\" or \"warning\" "
|
||||
"instead">;
|
||||
def err_constexpr_body_no_return : Error<
|
||||
"no return statement in constexpr function">;
|
||||
def err_constexpr_return_missing_expr : Error<
|
||||
@ -3333,6 +3336,9 @@ def note_ovl_candidate : Note<"candidate "
|
||||
|
||||
def note_ovl_candidate_inherited_constructor : Note<
|
||||
"constructor from base class %0 inherited here">;
|
||||
def note_ovl_candidate_inherited_constructor_slice : Note<
|
||||
"constructor inherited from base class cannot be used to initialize from "
|
||||
"an argument of the derived class type">;
|
||||
def note_ovl_candidate_illegal_constructor : Note<
|
||||
"candidate %select{constructor|template}0 ignored: "
|
||||
"instantiation %select{takes|would take}0 its own class type by value">;
|
||||
@ -3366,7 +3372,9 @@ def note_ovl_candidate_disabled_by_enable_if : Note<
|
||||
def note_ovl_candidate_has_pass_object_size_params: Note<
|
||||
"candidate address cannot be taken because parameter %0 has "
|
||||
"pass_object_size attribute">;
|
||||
def note_ovl_candidate_disabled_by_enable_if_attr : Note<
|
||||
def err_diagnose_if_succeeded : Error<"%0">;
|
||||
def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>;
|
||||
def note_ovl_candidate_disabled_by_function_cond_attr : Note<
|
||||
"candidate disabled: %0">;
|
||||
def note_ovl_candidate_disabled_by_extension : Note<
|
||||
"candidate disabled due to OpenCL extension">;
|
||||
@ -4395,6 +4403,7 @@ def note_not_found_by_two_phase_lookup : Note<"%0 should be declared prior to th
|
||||
def err_undeclared_use : Error<"use of undeclared %0">;
|
||||
def warn_deprecated : Warning<"%0 is deprecated">,
|
||||
InGroup<DeprecatedDeclarations>;
|
||||
def note_from_diagnose_if : Note<"from 'diagnose_if' attribute on %0:">;
|
||||
def warn_property_method_deprecated :
|
||||
Warning<"property access is using %0 method which is deprecated">,
|
||||
InGroup<DeprecatedDeclarations>;
|
||||
|
@ -146,6 +146,7 @@ LANGOPT(Modules , 1, 0, "modules extension to C")
|
||||
COMPATIBLE_LANGOPT(ModulesTS , 1, 0, "C++ Modules TS")
|
||||
BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 2, CMK_None,
|
||||
"compiling a module interface")
|
||||
BENIGN_LANGOPT(CompilingPCH, 1, 0, "building a pch")
|
||||
COMPATIBLE_LANGOPT(ModulesDeclUse , 1, 0, "require declaration of module uses")
|
||||
BENIGN_LANGOPT(ModulesSearchAll , 1, 1, "searching even non-imported modules to find unresolved references")
|
||||
COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "requiring declaration of module uses and all headers to be in modules")
|
||||
|
@ -167,6 +167,9 @@ def disable_llvm_passes : Flag<["-"], "disable-llvm-passes">,
|
||||
"frontend by not running any LLVM passes at all">;
|
||||
def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">,
|
||||
Alias<disable_llvm_passes>;
|
||||
def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">,
|
||||
HelpText<"Disable lifetime-markers emission even when optimizations are "
|
||||
"enabled">;
|
||||
def disable_red_zone : Flag<["-"], "disable-red-zone">,
|
||||
HelpText<"Do not emit code that uses the red zone.">;
|
||||
def dwarf_column_info : Flag<["-"], "dwarf-column-info">,
|
||||
|
@ -52,6 +52,7 @@ CODEGENOPT(DisableGCov , 1, 0) ///< Don't run the GCov pass, for testing.
|
||||
CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
|
||||
///< the pristine IR generated by the
|
||||
///< frontend.
|
||||
CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
|
||||
CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental
|
||||
///< pass manager.
|
||||
CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled.
|
||||
|
@ -88,6 +88,8 @@ class GeneratePCHAction : public ASTFrontendAction {
|
||||
static std::unique_ptr<raw_pwrite_stream>
|
||||
ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
|
||||
std::string &Sysroot, std::string &OutputFile);
|
||||
|
||||
bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
|
||||
};
|
||||
|
||||
class GenerateModuleAction : public ASTFrontendAction {
|
||||
|
@ -59,6 +59,13 @@ enum class SymbolLanguage {
|
||||
CXX,
|
||||
};
|
||||
|
||||
/// Language specific sub-kinds.
|
||||
enum class SymbolSubKind {
|
||||
None,
|
||||
CXXCopyConstructor,
|
||||
CXXMoveConstructor,
|
||||
};
|
||||
|
||||
/// Set of properties that provide additional info about a symbol.
|
||||
enum class SymbolProperty : uint8_t {
|
||||
Generic = 1 << 0,
|
||||
@ -107,6 +114,7 @@ struct SymbolRelation {
|
||||
|
||||
struct SymbolInfo {
|
||||
SymbolKind Kind;
|
||||
SymbolSubKind SubKind;
|
||||
SymbolPropertySet Properties;
|
||||
SymbolLanguage Lang;
|
||||
};
|
||||
@ -121,6 +129,7 @@ void printSymbolRoles(SymbolRoleSet Roles, raw_ostream &OS);
|
||||
bool printSymbolName(const Decl *D, const LangOptions &LO, raw_ostream &OS);
|
||||
|
||||
StringRef getSymbolKindString(SymbolKind K);
|
||||
StringRef getSymbolSubKindString(SymbolSubKind K);
|
||||
StringRef getSymbolLanguageString(SymbolLanguage K);
|
||||
|
||||
void applyForEachSymbolProperty(SymbolPropertySet Props,
|
||||
|
@ -215,14 +215,14 @@ class InitializedEntity {
|
||||
|
||||
/// \brief Create the initialization entity for a parameter.
|
||||
static InitializedEntity InitializeParameter(ASTContext &Context,
|
||||
ParmVarDecl *Parm) {
|
||||
const ParmVarDecl *Parm) {
|
||||
return InitializeParameter(Context, Parm, Parm->getType());
|
||||
}
|
||||
|
||||
/// \brief Create the initialization entity for a parameter, but use
|
||||
/// another type.
|
||||
static InitializedEntity InitializeParameter(ASTContext &Context,
|
||||
ParmVarDecl *Parm,
|
||||
const ParmVarDecl *Parm,
|
||||
QualType Type) {
|
||||
bool Consumed = (Context.getLangOpts().ObjCAutoRefCount &&
|
||||
Parm->hasAttr<NSConsumedAttr>());
|
||||
|
@ -531,6 +531,13 @@ namespace clang {
|
||||
Ambiguous.construct();
|
||||
}
|
||||
|
||||
void setAsIdentityConversion(QualType T) {
|
||||
setStandard();
|
||||
Standard.setAsIdentityConversion();
|
||||
Standard.setFromType(T);
|
||||
Standard.setAllToTypes(T);
|
||||
}
|
||||
|
||||
/// \brief Whether the target is really a std::initializer_list, and the
|
||||
/// sequence only represents the worst element conversion.
|
||||
bool isStdInitializerListElement() const {
|
||||
@ -601,8 +608,17 @@ namespace clang {
|
||||
|
||||
/// This candidate was not viable because its OpenCL extension is disabled.
|
||||
ovl_fail_ext_disabled,
|
||||
|
||||
/// This inherited constructor is not viable because it would slice the
|
||||
/// argument.
|
||||
ovl_fail_inhctor_slice,
|
||||
};
|
||||
|
||||
/// A list of implicit conversion sequences for the arguments of an
|
||||
/// OverloadCandidate.
|
||||
typedef llvm::MutableArrayRef<ImplicitConversionSequence>
|
||||
ConversionSequenceList;
|
||||
|
||||
/// OverloadCandidate - A single candidate in an overload set (C++ 13.3).
|
||||
struct OverloadCandidate {
|
||||
/// Function - The actual function that this candidate
|
||||
@ -627,18 +643,13 @@ namespace clang {
|
||||
/// is a surrogate, but only if IsSurrogate is true.
|
||||
CXXConversionDecl *Surrogate;
|
||||
|
||||
/// Conversions - The conversion sequences used to convert the
|
||||
/// function arguments to the function parameters, the pointer points to a
|
||||
/// fixed size array with NumConversions elements. The memory is owned by
|
||||
/// the OverloadCandidateSet.
|
||||
ImplicitConversionSequence *Conversions;
|
||||
/// The conversion sequences used to convert the function arguments
|
||||
/// to the function parameters.
|
||||
ConversionSequenceList Conversions;
|
||||
|
||||
/// The FixIt hints which can be used to fix the Bad candidate.
|
||||
ConversionFixItGenerator Fix;
|
||||
|
||||
/// NumConversions - The number of elements in the Conversions array.
|
||||
unsigned NumConversions;
|
||||
|
||||
/// Viable - True to indicate that this overload candidate is viable.
|
||||
bool Viable;
|
||||
|
||||
@ -664,6 +675,26 @@ namespace clang {
|
||||
/// to be used while performing partial ordering of function templates.
|
||||
unsigned ExplicitCallArguments;
|
||||
|
||||
/// The number of diagnose_if attributes that this overload triggered.
|
||||
/// If any of the triggered attributes are errors, this won't count
|
||||
/// diagnose_if warnings.
|
||||
unsigned NumTriggeredDiagnoseIfs = 0;
|
||||
|
||||
/// Basically a TinyPtrVector<DiagnoseIfAttr *> that doesn't own the vector:
|
||||
/// If NumTriggeredDiagnoseIfs is 0 or 1, this is a DiagnoseIfAttr *,
|
||||
/// otherwise it's a pointer to an array of `NumTriggeredDiagnoseIfs`
|
||||
/// DiagnoseIfAttr *s.
|
||||
llvm::PointerUnion<DiagnoseIfAttr *, DiagnoseIfAttr **> DiagnoseIfInfo;
|
||||
|
||||
/// Gets an ArrayRef for the data at DiagnoseIfInfo. Note that this may give
|
||||
/// you a pointer into DiagnoseIfInfo.
|
||||
ArrayRef<DiagnoseIfAttr *> getDiagnoseIfInfo() const {
|
||||
auto *Ptr = NumTriggeredDiagnoseIfs <= 1
|
||||
? DiagnoseIfInfo.getAddrOfPtr1()
|
||||
: DiagnoseIfInfo.get<DiagnoseIfAttr **>();
|
||||
return {Ptr, NumTriggeredDiagnoseIfs};
|
||||
}
|
||||
|
||||
union {
|
||||
DeductionFailureInfo DeductionFailure;
|
||||
|
||||
@ -677,9 +708,9 @@ namespace clang {
|
||||
/// hasAmbiguousConversion - Returns whether this overload
|
||||
/// candidate requires an ambiguous conversion or not.
|
||||
bool hasAmbiguousConversion() const {
|
||||
for (unsigned i = 0, e = NumConversions; i != e; ++i) {
|
||||
if (!Conversions[i].isInitialized()) return false;
|
||||
if (Conversions[i].isAmbiguous()) return true;
|
||||
for (auto &C : Conversions) {
|
||||
if (!C.isInitialized()) return false;
|
||||
if (C.isAmbiguous()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -728,17 +759,42 @@ namespace clang {
|
||||
SmallVector<OverloadCandidate, 16> Candidates;
|
||||
llvm::SmallPtrSet<Decl *, 16> Functions;
|
||||
|
||||
// Allocator for OverloadCandidate::Conversions. We store the first few
|
||||
// elements inline to avoid allocation for small sets.
|
||||
llvm::BumpPtrAllocator ConversionSequenceAllocator;
|
||||
// Allocator for ConversionSequenceLists and DiagnoseIfAttr* arrays.
|
||||
// We store the first few of each of these inline to avoid allocation for
|
||||
// small sets.
|
||||
llvm::BumpPtrAllocator SlabAllocator;
|
||||
|
||||
SourceLocation Loc;
|
||||
CandidateSetKind Kind;
|
||||
|
||||
unsigned NumInlineSequences;
|
||||
llvm::AlignedCharArray<alignof(ImplicitConversionSequence),
|
||||
16 * sizeof(ImplicitConversionSequence)>
|
||||
InlineSpace;
|
||||
constexpr static unsigned NumInlineBytes =
|
||||
24 * sizeof(ImplicitConversionSequence);
|
||||
unsigned NumInlineBytesUsed;
|
||||
llvm::AlignedCharArray<alignof(void *), NumInlineBytes> InlineSpace;
|
||||
|
||||
/// If we have space, allocates from inline storage. Otherwise, allocates
|
||||
/// from the slab allocator.
|
||||
/// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
|
||||
/// instead.
|
||||
template <typename T>
|
||||
T *slabAllocate(unsigned N) {
|
||||
// It's simpler if this doesn't need to consider alignment.
|
||||
static_assert(alignof(T) == alignof(void *),
|
||||
"Only works for pointer-aligned types.");
|
||||
static_assert(std::is_trivial<T>::value ||
|
||||
std::is_same<ImplicitConversionSequence, T>::value,
|
||||
"Add destruction logic to OverloadCandidateSet::clear().");
|
||||
|
||||
unsigned NBytes = sizeof(T) * N;
|
||||
if (NBytes > NumInlineBytes - NumInlineBytesUsed)
|
||||
return SlabAllocator.Allocate<T>(N);
|
||||
char *FreeSpaceStart = InlineSpace.buffer + NumInlineBytesUsed;
|
||||
assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
|
||||
"Misaligned storage!");
|
||||
|
||||
NumInlineBytesUsed += NBytes;
|
||||
return reinterpret_cast<T *>(FreeSpaceStart);
|
||||
}
|
||||
|
||||
OverloadCandidateSet(const OverloadCandidateSet &) = delete;
|
||||
void operator=(const OverloadCandidateSet &) = delete;
|
||||
@ -747,12 +803,17 @@ namespace clang {
|
||||
|
||||
public:
|
||||
OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK)
|
||||
: Loc(Loc), Kind(CSK), NumInlineSequences(0) {}
|
||||
: Loc(Loc), Kind(CSK), NumInlineBytesUsed(0) {}
|
||||
~OverloadCandidateSet() { destroyCandidates(); }
|
||||
|
||||
SourceLocation getLocation() const { return Loc; }
|
||||
CandidateSetKind getKind() const { return Kind; }
|
||||
|
||||
/// Make a DiagnoseIfAttr* array in a block of memory that will live for
|
||||
/// as long as this OverloadCandidateSet. Returns a pointer to the start
|
||||
/// of that array.
|
||||
DiagnoseIfAttr **addDiagnoseIfComplaints(ArrayRef<DiagnoseIfAttr *> CA);
|
||||
|
||||
/// \brief Determine when this overload candidate will be new to the
|
||||
/// overload set.
|
||||
bool isNewCandidate(Decl *F) {
|
||||
@ -769,30 +830,32 @@ namespace clang {
|
||||
size_t size() const { return Candidates.size(); }
|
||||
bool empty() const { return Candidates.empty(); }
|
||||
|
||||
/// \brief Add a new candidate with NumConversions conversion sequence slots
|
||||
/// to the overload set.
|
||||
OverloadCandidate &addCandidate(unsigned NumConversions = 0) {
|
||||
Candidates.push_back(OverloadCandidate());
|
||||
OverloadCandidate &C = Candidates.back();
|
||||
|
||||
// Assign space from the inline array if there are enough free slots
|
||||
// available.
|
||||
if (NumConversions + NumInlineSequences <= 16) {
|
||||
ImplicitConversionSequence *I =
|
||||
(ImplicitConversionSequence *)InlineSpace.buffer;
|
||||
C.Conversions = &I[NumInlineSequences];
|
||||
NumInlineSequences += NumConversions;
|
||||
} else {
|
||||
// Otherwise get memory from the allocator.
|
||||
C.Conversions = ConversionSequenceAllocator
|
||||
.Allocate<ImplicitConversionSequence>(NumConversions);
|
||||
}
|
||||
/// \brief Allocate storage for conversion sequences for NumConversions
|
||||
/// conversions.
|
||||
ConversionSequenceList
|
||||
allocateConversionSequences(unsigned NumConversions) {
|
||||
ImplicitConversionSequence *Conversions =
|
||||
slabAllocate<ImplicitConversionSequence>(NumConversions);
|
||||
|
||||
// Construct the new objects.
|
||||
for (unsigned i = 0; i != NumConversions; ++i)
|
||||
new (&C.Conversions[i]) ImplicitConversionSequence();
|
||||
for (unsigned I = 0; I != NumConversions; ++I)
|
||||
new (&Conversions[I]) ImplicitConversionSequence();
|
||||
|
||||
C.NumConversions = NumConversions;
|
||||
return ConversionSequenceList(Conversions, NumConversions);
|
||||
}
|
||||
|
||||
/// \brief Add a new candidate with NumConversions conversion sequence slots
|
||||
/// to the overload set.
|
||||
OverloadCandidate &addCandidate(unsigned NumConversions = 0,
|
||||
ConversionSequenceList Conversions = None) {
|
||||
assert((Conversions.empty() || Conversions.size() == NumConversions) &&
|
||||
"preallocated conversion sequence has wrong length");
|
||||
|
||||
Candidates.push_back(OverloadCandidate());
|
||||
OverloadCandidate &C = Candidates.back();
|
||||
C.Conversions = Conversions.empty()
|
||||
? allocateConversionSequences(NumConversions)
|
||||
: Conversions;
|
||||
return C;
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "clang/AST/NSAPI.h"
|
||||
#include "clang/AST/PrettyPrinter.h"
|
||||
#include "clang/AST/TypeLoc.h"
|
||||
#include "clang/AST/TypeOrdering.h"
|
||||
#include "clang/Basic/ExpressionTraits.h"
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/Module.h"
|
||||
@ -119,6 +120,7 @@ namespace clang {
|
||||
class FunctionProtoType;
|
||||
class FunctionTemplateDecl;
|
||||
class ImplicitConversionSequence;
|
||||
typedef MutableArrayRef<ImplicitConversionSequence> ConversionSequenceList;
|
||||
class InitListExpr;
|
||||
class InitializationKind;
|
||||
class InitializationSequence;
|
||||
@ -806,6 +808,12 @@ class Sema {
|
||||
/// run time.
|
||||
Unevaluated,
|
||||
|
||||
/// \brief The current expression occurs within a braced-init-list within
|
||||
/// an unevaluated operand. This is mostly like a regular unevaluated
|
||||
/// context, except that we still instantiate constexpr functions that are
|
||||
/// referenced here so that we can perform narrowing checks correctly.
|
||||
UnevaluatedList,
|
||||
|
||||
/// \brief The current expression occurs within a discarded statement.
|
||||
/// This behaves largely similarly to an unevaluated operand in preventing
|
||||
/// definitions from being required, but not in other ways.
|
||||
@ -898,7 +906,8 @@ class Sema {
|
||||
MangleNumberingContext &getMangleNumberingContext(ASTContext &Ctx);
|
||||
|
||||
bool isUnevaluated() const {
|
||||
return Context == Unevaluated || Context == UnevaluatedAbstract;
|
||||
return Context == Unevaluated || Context == UnevaluatedAbstract ||
|
||||
Context == UnevaluatedList;
|
||||
}
|
||||
};
|
||||
|
||||
@ -2510,10 +2519,11 @@ class Sema {
|
||||
void AddOverloadCandidate(FunctionDecl *Function,
|
||||
DeclAccessPair FoundDecl,
|
||||
ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet& CandidateSet,
|
||||
OverloadCandidateSet &CandidateSet,
|
||||
bool SuppressUserConversions = false,
|
||||
bool PartialOverloading = false,
|
||||
bool AllowExplicit = false);
|
||||
bool AllowExplicit = false,
|
||||
ConversionSequenceList EarlyConversions = None);
|
||||
void AddFunctionCandidates(const UnresolvedSetImpl &Functions,
|
||||
ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet &CandidateSet,
|
||||
@ -2523,23 +2533,25 @@ class Sema {
|
||||
void AddMethodCandidate(DeclAccessPair FoundDecl,
|
||||
QualType ObjectType,
|
||||
Expr::Classification ObjectClassification,
|
||||
ArrayRef<Expr *> Args,
|
||||
Expr *ThisArg, ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet& CandidateSet,
|
||||
bool SuppressUserConversion = false);
|
||||
void AddMethodCandidate(CXXMethodDecl *Method,
|
||||
DeclAccessPair FoundDecl,
|
||||
CXXRecordDecl *ActingContext, QualType ObjectType,
|
||||
Expr::Classification ObjectClassification,
|
||||
ArrayRef<Expr *> Args,
|
||||
Expr *ThisArg, ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet& CandidateSet,
|
||||
bool SuppressUserConversions = false,
|
||||
bool PartialOverloading = false);
|
||||
bool PartialOverloading = false,
|
||||
ConversionSequenceList EarlyConversions = None);
|
||||
void AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl,
|
||||
DeclAccessPair FoundDecl,
|
||||
CXXRecordDecl *ActingContext,
|
||||
TemplateArgumentListInfo *ExplicitTemplateArgs,
|
||||
QualType ObjectType,
|
||||
Expr::Classification ObjectClassification,
|
||||
Expr *ThisArg,
|
||||
ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet& CandidateSet,
|
||||
bool SuppressUserConversions = false,
|
||||
@ -2551,6 +2563,16 @@ class Sema {
|
||||
OverloadCandidateSet& CandidateSet,
|
||||
bool SuppressUserConversions = false,
|
||||
bool PartialOverloading = false);
|
||||
bool CheckNonDependentConversions(FunctionTemplateDecl *FunctionTemplate,
|
||||
ArrayRef<QualType> ParamTypes,
|
||||
ArrayRef<Expr *> Args,
|
||||
OverloadCandidateSet &CandidateSet,
|
||||
ConversionSequenceList &Conversions,
|
||||
bool SuppressUserConversions,
|
||||
CXXRecordDecl *ActingContext = nullptr,
|
||||
QualType ObjectType = QualType(),
|
||||
Expr::Classification
|
||||
ObjectClassification = {});
|
||||
void AddConversionCandidate(CXXConversionDecl *Conversion,
|
||||
DeclAccessPair FoundDecl,
|
||||
CXXRecordDecl *ActingContext,
|
||||
@ -2603,6 +2625,38 @@ class Sema {
|
||||
EnableIfAttr *CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
|
||||
bool MissingImplicitThis = false);
|
||||
|
||||
/// Check the diagnose_if attributes on the given function. Returns the
|
||||
/// first succesful fatal attribute, or null if calling Function(Args) isn't
|
||||
/// an error.
|
||||
///
|
||||
/// This only considers ArgDependent DiagnoseIfAttrs.
|
||||
///
|
||||
/// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
|
||||
/// succeed. If this function returns non-null, the contents of Nonfatal are
|
||||
/// unspecified.
|
||||
DiagnoseIfAttr *
|
||||
checkArgDependentDiagnoseIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
|
||||
SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
|
||||
bool MissingImplicitThis = false,
|
||||
Expr *ThisArg = nullptr);
|
||||
|
||||
/// Check the diagnose_if expressions on the given function. Returns the
|
||||
/// first succesful fatal attribute, or null if using Function isn't
|
||||
/// an error.
|
||||
///
|
||||
/// This ignores all ArgDependent DiagnoseIfAttrs.
|
||||
///
|
||||
/// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
|
||||
/// succeed. If this function returns non-null, the contents of Nonfatal are
|
||||
/// unspecified.
|
||||
DiagnoseIfAttr *
|
||||
checkArgIndependentDiagnoseIf(FunctionDecl *Function,
|
||||
SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal);
|
||||
|
||||
/// Emits the diagnostic contained in the given DiagnoseIfAttr at Loc. Also
|
||||
/// emits a note about the location of said attribute.
|
||||
void emitDiagnoseIfDiagnostic(SourceLocation Loc, const DiagnoseIfAttr *DIA);
|
||||
|
||||
/// Returns whether the given function's address can be taken or not,
|
||||
/// optionally emitting a diagnostic if the address can't be taken.
|
||||
///
|
||||
@ -3801,6 +3855,9 @@ class Sema {
|
||||
/// variable will have in the given scope.
|
||||
QualType getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc);
|
||||
|
||||
/// Mark all of the declarations referenced within a particular AST node as
|
||||
/// referenced. Used when template instantiation instantiates a non-dependent
|
||||
/// type -- entities referenced by the type are now referenced.
|
||||
void MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T);
|
||||
void MarkDeclarationsReferencedInExpr(Expr *E,
|
||||
bool SkipLocalVariables = false);
|
||||
@ -6580,6 +6637,8 @@ class Sema {
|
||||
/// \brief The explicitly-specified template arguments were not valid
|
||||
/// template arguments for the given template.
|
||||
TDK_InvalidExplicitArguments,
|
||||
/// \brief Checking non-dependent argument conversions failed.
|
||||
TDK_NonDependentConversionFailure,
|
||||
/// \brief Deduction failed; that's all we know.
|
||||
TDK_MiscellaneousDeductionFailure,
|
||||
/// \brief CUDA Target attributes do not match.
|
||||
@ -6618,22 +6677,21 @@ class Sema {
|
||||
QualType OriginalArgType;
|
||||
};
|
||||
|
||||
TemplateDeductionResult
|
||||
FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
|
||||
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
|
||||
unsigned NumExplicitlySpecified,
|
||||
FunctionDecl *&Specialization,
|
||||
sema::TemplateDeductionInfo &Info,
|
||||
SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
|
||||
bool PartialOverloading = false);
|
||||
TemplateDeductionResult FinishTemplateArgumentDeduction(
|
||||
FunctionTemplateDecl *FunctionTemplate,
|
||||
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
|
||||
unsigned NumExplicitlySpecified, FunctionDecl *&Specialization,
|
||||
sema::TemplateDeductionInfo &Info,
|
||||
SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
|
||||
bool PartialOverloading = false,
|
||||
llvm::function_ref<bool()> CheckNonDependent = []{ return false; });
|
||||
|
||||
TemplateDeductionResult
|
||||
DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
|
||||
TemplateArgumentListInfo *ExplicitTemplateArgs,
|
||||
ArrayRef<Expr *> Args,
|
||||
FunctionDecl *&Specialization,
|
||||
sema::TemplateDeductionInfo &Info,
|
||||
bool PartialOverloading = false);
|
||||
TemplateDeductionResult DeduceTemplateArguments(
|
||||
FunctionTemplateDecl *FunctionTemplate,
|
||||
TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef<Expr *> Args,
|
||||
FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info,
|
||||
bool PartialOverloading,
|
||||
llvm::function_ref<bool(ArrayRef<QualType>)> CheckNonDependent);
|
||||
|
||||
TemplateDeductionResult
|
||||
DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
|
||||
@ -6877,6 +6935,10 @@ class Sema {
|
||||
/// Specializations whose definitions are currently being instantiated.
|
||||
llvm::DenseSet<std::pair<Decl *, unsigned>> InstantiatingSpecializations;
|
||||
|
||||
/// Non-dependent types used in templates that have already been instantiated
|
||||
/// by some template instantiation.
|
||||
llvm::DenseSet<QualType> InstantiatedNonDependentTypes;
|
||||
|
||||
/// \brief Extra modules inspected when performing a lookup during a template
|
||||
/// instantiation. Computed lazily.
|
||||
SmallVector<Module*, 16> ActiveTemplateInstantiationLookupModules;
|
||||
@ -10186,6 +10248,22 @@ class EnterExpressionEvaluationContext {
|
||||
IsDecltype);
|
||||
}
|
||||
|
||||
enum InitListTag { InitList };
|
||||
EnterExpressionEvaluationContext(Sema &Actions, InitListTag,
|
||||
bool ShouldEnter = true)
|
||||
: Actions(Actions), Entered(false) {
|
||||
// In C++11 onwards, narrowing checks are performed on the contents of
|
||||
// braced-init-lists, even when they occur within unevaluated operands.
|
||||
// Therefore we still need to instantiate constexpr functions used in such
|
||||
// a context.
|
||||
if (ShouldEnter && Actions.isUnevaluatedContext() &&
|
||||
Actions.getLangOpts().CPlusPlus11) {
|
||||
Actions.PushExpressionEvaluationContext(Sema::UnevaluatedList, nullptr,
|
||||
false);
|
||||
Entered = true;
|
||||
}
|
||||
}
|
||||
|
||||
~EnterExpressionEvaluationContext() {
|
||||
if (Entered)
|
||||
Actions.PopExpressionEvaluationContext();
|
||||
|
@ -278,6 +278,14 @@ def VirtualCallChecker : Checker<"VirtualCall">,
|
||||
|
||||
} // end: "optin.cplusplus"
|
||||
|
||||
let ParentPackage = CplusplusAlpha in {
|
||||
|
||||
def IteratorPastEndChecker : Checker<"IteratorPastEnd">,
|
||||
HelpText<"Check iterators used past end">,
|
||||
DescFile<"IteratorPastEndChecker.cpp">;
|
||||
|
||||
} // end: "alpha.cplusplus"
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Valist checkers.
|
||||
|
@ -4543,6 +4543,12 @@ class ExprEvaluatorBase
|
||||
Call.getLValueBase().dyn_cast<const ValueDecl*>());
|
||||
if (!FD)
|
||||
return Error(Callee);
|
||||
// Don't call function pointers which have been cast to some other type.
|
||||
// Per DR (no number yet), the caller and callee can differ in noexcept.
|
||||
if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
|
||||
CalleeType->getPointeeType(), FD->getType())) {
|
||||
return Error(E);
|
||||
}
|
||||
|
||||
// Overloaded operator calls to member functions are represented as normal
|
||||
// calls with '*this' as the first argument.
|
||||
@ -4558,14 +4564,42 @@ class ExprEvaluatorBase
|
||||
return false;
|
||||
This = &ThisVal;
|
||||
Args = Args.slice(1);
|
||||
} else if (MD && MD->isLambdaStaticInvoker()) {
|
||||
// Map the static invoker for the lambda back to the call operator.
|
||||
// Conveniently, we don't have to slice out the 'this' argument (as is
|
||||
// being done for the non-static case), since a static member function
|
||||
// doesn't have an implicit argument passed in.
|
||||
const CXXRecordDecl *ClosureClass = MD->getParent();
|
||||
assert(
|
||||
ClosureClass->captures_begin() == ClosureClass->captures_end() &&
|
||||
"Number of captures must be zero for conversion to function-ptr");
|
||||
|
||||
const CXXMethodDecl *LambdaCallOp =
|
||||
ClosureClass->getLambdaCallOperator();
|
||||
|
||||
// Set 'FD', the function that will be called below, to the call
|
||||
// operator. If the closure object represents a generic lambda, find
|
||||
// the corresponding specialization of the call operator.
|
||||
|
||||
if (ClosureClass->isGenericLambda()) {
|
||||
assert(MD->isFunctionTemplateSpecialization() &&
|
||||
"A generic lambda's static-invoker function must be a "
|
||||
"template specialization");
|
||||
const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
|
||||
FunctionTemplateDecl *CallOpTemplate =
|
||||
LambdaCallOp->getDescribedFunctionTemplate();
|
||||
void *InsertPos = nullptr;
|
||||
FunctionDecl *CorrespondingCallOpSpecialization =
|
||||
CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
|
||||
assert(CorrespondingCallOpSpecialization &&
|
||||
"We must always have a function call operator specialization "
|
||||
"that corresponds to our static invoker specialization");
|
||||
FD = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
|
||||
} else
|
||||
FD = LambdaCallOp;
|
||||
}
|
||||
|
||||
// Don't call function pointers which have been cast to some other type.
|
||||
// Per DR (no number yet), the caller and callee can differ in noexcept.
|
||||
if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
|
||||
CalleeType->getPointeeType(), FD->getType())) {
|
||||
return Error(E);
|
||||
}
|
||||
|
||||
} else
|
||||
return Error(E);
|
||||
|
||||
@ -5834,6 +5868,7 @@ namespace {
|
||||
bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
|
||||
return VisitCXXConstructExpr(E, E->getType());
|
||||
}
|
||||
bool VisitLambdaExpr(const LambdaExpr *E);
|
||||
bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
|
||||
bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T);
|
||||
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
|
||||
@ -6168,6 +6203,21 @@ bool RecordExprEvaluator::VisitCXXStdInitializerListExpr(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
|
||||
const CXXRecordDecl *ClosureClass = E->getLambdaClass();
|
||||
if (ClosureClass->isInvalidDecl()) return false;
|
||||
|
||||
if (Info.checkingPotentialConstantExpression()) return true;
|
||||
if (E->capture_size()) {
|
||||
Info.FFDiag(E, diag::note_unimplemented_constexpr_lambda_feature_ast)
|
||||
<< "can not evaluate lambda expressions with captures";
|
||||
return false;
|
||||
}
|
||||
// FIXME: Implement captures.
|
||||
Result = APValue(APValue::UninitStruct(), /*NumBases*/0, /*NumFields*/0);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool EvaluateRecord(const Expr *E, const LValue &This,
|
||||
APValue &Result, EvalInfo &Info) {
|
||||
assert(E->isRValue() && E->getType()->isRecordType() &&
|
||||
@ -6217,6 +6267,9 @@ class TemporaryExprEvaluator
|
||||
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) {
|
||||
return VisitConstructExpr(E);
|
||||
}
|
||||
bool VisitLambdaExpr(const LambdaExpr *E) {
|
||||
return VisitConstructExpr(E);
|
||||
}
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -10357,10 +10410,25 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result,
|
||||
|
||||
bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
|
||||
const FunctionDecl *Callee,
|
||||
ArrayRef<const Expr*> Args) const {
|
||||
ArrayRef<const Expr*> Args,
|
||||
const Expr *This) const {
|
||||
Expr::EvalStatus Status;
|
||||
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated);
|
||||
|
||||
LValue ThisVal;
|
||||
const LValue *ThisPtr = nullptr;
|
||||
if (This) {
|
||||
#ifndef NDEBUG
|
||||
auto *MD = dyn_cast<CXXMethodDecl>(Callee);
|
||||
assert(MD && "Don't provide `this` for non-methods.");
|
||||
assert(!MD->isStatic() && "Don't provide `this` for static methods.");
|
||||
#endif
|
||||
if (EvaluateObjectArgument(Info, This, ThisVal))
|
||||
ThisPtr = &ThisVal;
|
||||
if (Info.EvalStatus.HasSideEffects)
|
||||
return false;
|
||||
}
|
||||
|
||||
ArgVector ArgValues(Args.size());
|
||||
for (ArrayRef<const Expr*>::iterator I = Args.begin(), E = Args.end();
|
||||
I != E; ++I) {
|
||||
@ -10373,7 +10441,7 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
|
||||
}
|
||||
|
||||
// Build fake call to Callee.
|
||||
CallStackFrame Frame(Info, Callee->getLocation(), Callee, /*This*/nullptr,
|
||||
CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr,
|
||||
ArgValues.data());
|
||||
return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects;
|
||||
}
|
||||
|
@ -109,13 +109,13 @@ static const DeclContext *getEffectiveParentContext(const DeclContext *DC) {
|
||||
|
||||
static const FunctionDecl *getStructor(const NamedDecl *ND) {
|
||||
if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(ND))
|
||||
return FTD->getTemplatedDecl();
|
||||
return FTD->getTemplatedDecl()->getCanonicalDecl();
|
||||
|
||||
const auto *FD = cast<FunctionDecl>(ND);
|
||||
if (const auto *FTD = FD->getPrimaryTemplate())
|
||||
return FTD->getTemplatedDecl();
|
||||
return FTD->getTemplatedDecl()->getCanonicalDecl();
|
||||
|
||||
return FD;
|
||||
return FD->getCanonicalDecl();
|
||||
}
|
||||
|
||||
/// MicrosoftMangleContextImpl - Overrides the default MangleContext for the
|
||||
@ -312,6 +312,10 @@ class MicrosoftCXXNameMangler {
|
||||
void mangleNestedName(const NamedDecl *ND);
|
||||
|
||||
private:
|
||||
bool isStructorDecl(const NamedDecl *ND) const {
|
||||
return ND == Structor || getStructor(ND) == Structor;
|
||||
}
|
||||
|
||||
void mangleUnqualifiedName(const NamedDecl *ND) {
|
||||
mangleUnqualifiedName(ND, ND->getDeclName());
|
||||
}
|
||||
@ -898,7 +902,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
|
||||
llvm_unreachable("Can't mangle Objective-C selector names here!");
|
||||
|
||||
case DeclarationName::CXXConstructorName:
|
||||
if (Structor == getStructor(ND)) {
|
||||
if (isStructorDecl(ND)) {
|
||||
if (StructorType == Ctor_CopyingClosure) {
|
||||
Out << "?_O";
|
||||
return;
|
||||
@ -912,7 +916,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
|
||||
return;
|
||||
|
||||
case DeclarationName::CXXDestructorName:
|
||||
if (ND == Structor)
|
||||
if (isStructorDecl(ND))
|
||||
// If the named decl is the C++ destructor we're mangling,
|
||||
// use the type we were given.
|
||||
mangleCXXDtorType(static_cast<CXXDtorType>(StructorType));
|
||||
@ -1862,7 +1866,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
|
||||
IsStructor = true;
|
||||
IsCtorClosure = (StructorType == Ctor_CopyingClosure ||
|
||||
StructorType == Ctor_DefaultClosure) &&
|
||||
getStructor(MD) == Structor;
|
||||
isStructorDecl(MD);
|
||||
if (IsCtorClosure)
|
||||
CC = getASTContext().getDefaultCallingConvention(
|
||||
/*IsVariadic=*/false, /*IsCXXMethod=*/true);
|
||||
@ -1883,7 +1887,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
|
||||
// <return-type> ::= <type>
|
||||
// ::= @ # structors (they have no declared return type)
|
||||
if (IsStructor) {
|
||||
if (isa<CXXDestructorDecl>(D) && D == Structor &&
|
||||
if (isa<CXXDestructorDecl>(D) && isStructorDecl(D) &&
|
||||
StructorType == Dtor_Deleting) {
|
||||
// The scalar deleting destructor takes an extra int argument.
|
||||
// However, the FunctionType generated has 0 arguments.
|
||||
|
@ -312,7 +312,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
|
||||
// At O0 and O1 we only run the always inliner which is more efficient. At
|
||||
// higher optimization levels we run the normal inliner.
|
||||
if (CodeGenOpts.OptimizationLevel <= 1) {
|
||||
bool InsertLifetimeIntrinsics = CodeGenOpts.OptimizationLevel != 0;
|
||||
bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 &&
|
||||
!CodeGenOpts.DisableLifetimeMarkers);
|
||||
PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
|
||||
} else {
|
||||
PMBuilder.Inliner = createFunctionInliningPass(
|
||||
@ -519,11 +520,22 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
|
||||
.Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
|
||||
assert(RM.hasValue() && "invalid PIC model!");
|
||||
|
||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
|
||||
CodeGenOpt::Level OptLevel;
|
||||
switch (CodeGenOpts.OptimizationLevel) {
|
||||
default: break;
|
||||
case 0: OptLevel = CodeGenOpt::None; break;
|
||||
case 3: OptLevel = CodeGenOpt::Aggressive; break;
|
||||
default:
|
||||
llvm_unreachable("Invalid optimization level!");
|
||||
case 0:
|
||||
OptLevel = CodeGenOpt::None;
|
||||
break;
|
||||
case 1:
|
||||
OptLevel = CodeGenOpt::Less;
|
||||
break;
|
||||
case 2:
|
||||
OptLevel = CodeGenOpt::Default;
|
||||
break; // O2/Os/Oz
|
||||
case 3:
|
||||
OptLevel = CodeGenOpt::Aggressive;
|
||||
break;
|
||||
}
|
||||
|
||||
llvm::TargetOptions Options;
|
||||
@ -849,21 +861,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
|
||||
}
|
||||
}
|
||||
|
||||
static void runThinLTOBackend(const CodeGenOptions &CGOpts, Module *M,
|
||||
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
|
||||
std::unique_ptr<raw_pwrite_stream> OS) {
|
||||
// If we are performing a ThinLTO importing compile, load the function index
|
||||
// into memory and pass it into thinBackend, which will run the function
|
||||
// importer and invoke LTO passes.
|
||||
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
|
||||
llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
|
||||
if (!IndexOrErr) {
|
||||
logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
|
||||
"Error loading index file '" +
|
||||
CGOpts.ThinLTOIndexFile + "': ");
|
||||
return;
|
||||
}
|
||||
std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
|
||||
|
||||
StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
|
||||
ModuleToDefinedGVSummaries;
|
||||
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
|
||||
@ -949,8 +948,26 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
|
||||
BackendAction Action,
|
||||
std::unique_ptr<raw_pwrite_stream> OS) {
|
||||
if (!CGOpts.ThinLTOIndexFile.empty()) {
|
||||
runThinLTOBackend(CGOpts, M, std::move(OS));
|
||||
return;
|
||||
// If we are performing a ThinLTO importing compile, load the function index
|
||||
// into memory and pass it into runThinLTOBackend, which will run the
|
||||
// function importer and invoke LTO passes.
|
||||
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
|
||||
llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
|
||||
if (!IndexOrErr) {
|
||||
logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
|
||||
"Error loading index file '" +
|
||||
CGOpts.ThinLTOIndexFile + "': ");
|
||||
return;
|
||||
}
|
||||
std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
|
||||
// A null CombinedIndex means we should skip ThinLTO compilation
|
||||
// (LLVM will optionally ignore empty index files, returning null instead
|
||||
// of an error).
|
||||
bool DoThinLTOBackend = CombinedIndex != nullptr;
|
||||
if (DoThinLTOBackend) {
|
||||
runThinLTOBackend(CombinedIndex.get(), M, std::move(OS));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M);
|
||||
|
@ -616,6 +616,8 @@ struct EHPersonality {
|
||||
static const EHPersonality GNU_C_SJLJ;
|
||||
static const EHPersonality GNU_C_SEH;
|
||||
static const EHPersonality GNU_ObjC;
|
||||
static const EHPersonality GNU_ObjC_SJLJ;
|
||||
static const EHPersonality GNU_ObjC_SEH;
|
||||
static const EHPersonality GNUstep_ObjC;
|
||||
static const EHPersonality GNU_ObjCXX;
|
||||
static const EHPersonality NeXT_ObjC;
|
||||
|
@ -97,6 +97,10 @@ EHPersonality::GNU_CPlusPlus_SEH = { "__gxx_personality_seh0", nullptr };
|
||||
const EHPersonality
|
||||
EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
|
||||
const EHPersonality
|
||||
EHPersonality::GNU_ObjC_SJLJ = {"__gnu_objc_personality_sj0", "objc_exception_throw"};
|
||||
const EHPersonality
|
||||
EHPersonality::GNU_ObjC_SEH = {"__gnu_objc_personality_seh0", "objc_exception_throw"};
|
||||
const EHPersonality
|
||||
EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
|
||||
const EHPersonality
|
||||
EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
|
||||
@ -137,6 +141,10 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
|
||||
// fallthrough
|
||||
case ObjCRuntime::GCC:
|
||||
case ObjCRuntime::ObjFW:
|
||||
if (L.SjLjExceptions)
|
||||
return EHPersonality::GNU_ObjC_SJLJ;
|
||||
else if (useLibGCCSEHPersonality(T))
|
||||
return EHPersonality::GNU_ObjC_SEH;
|
||||
return EHPersonality::GNU_ObjC;
|
||||
}
|
||||
llvm_unreachable("bad runtime kind");
|
||||
|
@ -42,6 +42,9 @@ using namespace CodeGen;
|
||||
/// markers.
|
||||
static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
|
||||
const LangOptions &LangOpts) {
|
||||
if (CGOpts.DisableLifetimeMarkers)
|
||||
return false;
|
||||
|
||||
// Asan uses markers for use-after-scope checks.
|
||||
if (CGOpts.SanitizeAddressUseAfterScope)
|
||||
return true;
|
||||
|
@ -3812,6 +3812,7 @@ ToolChain::CXXStdlibType NetBSD::GetDefaultCXXStdlibType() const {
|
||||
if (Major >= 7 || Major == 0) {
|
||||
switch (getArch()) {
|
||||
case llvm::Triple::aarch64:
|
||||
case llvm::Triple::aarch64_be:
|
||||
case llvm::Triple::arm:
|
||||
case llvm::Triple::armeb:
|
||||
case llvm::Triple::thumb:
|
||||
|
@ -9644,6 +9644,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
if (Major >= 7 || Major == 0) {
|
||||
switch (getToolChain().getArch()) {
|
||||
case llvm::Triple::aarch64:
|
||||
case llvm::Triple::aarch64_be:
|
||||
case llvm::Triple::arm:
|
||||
case llvm::Triple::armeb:
|
||||
case llvm::Triple::thumb:
|
||||
|
@ -1282,9 +1282,7 @@ class AnnotatingParser {
|
||||
return TT_UnaryOperator;
|
||||
|
||||
const FormatToken *NextToken = Tok.getNextNonComment();
|
||||
if (!NextToken ||
|
||||
NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal,
|
||||
Keywords.kw_override) ||
|
||||
if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) ||
|
||||
(NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
|
||||
return TT_PointerOrReference;
|
||||
|
||||
@ -2088,9 +2086,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
|
||||
!Line.IsMultiVariableDeclStmt)))
|
||||
return true;
|
||||
if (Left.is(TT_PointerOrReference))
|
||||
return Right.Tok.isLiteral() ||
|
||||
Right.isOneOf(TT_BlockComment, Keywords.kw_final,
|
||||
Keywords.kw_override) ||
|
||||
return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
|
||||
(Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
|
||||
!Right.is(TT_StartOfName)) ||
|
||||
(Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
|
||||
(!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
|
||||
tok::l_paren) &&
|
||||
|
@ -737,7 +737,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
|
||||
return;
|
||||
}
|
||||
if (Next->is(tok::exclaim) && PreviousMustBeValue)
|
||||
addUnwrappedLine();
|
||||
return addUnwrappedLine();
|
||||
bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
|
||||
bool NextEndsTemplateExpr =
|
||||
Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
|
||||
@ -745,9 +745,10 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
|
||||
(PreviousMustBeValue ||
|
||||
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
|
||||
tok::minusminus)))
|
||||
addUnwrappedLine();
|
||||
if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
|
||||
addUnwrappedLine();
|
||||
return addUnwrappedLine();
|
||||
if ((PreviousMustBeValue || Previous->is(tok::r_brace)) &&
|
||||
isJSDeclOrStmt(Keywords, Next))
|
||||
return addUnwrappedLine();
|
||||
}
|
||||
|
||||
void UnwrappedLineParser::parseStructuralElement() {
|
||||
@ -1974,7 +1975,14 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
|
||||
!FormatTok->isStringLiteral())
|
||||
return;
|
||||
|
||||
while (!eof() && FormatTok->isNot(tok::semi)) {
|
||||
while (!eof()) {
|
||||
if (FormatTok->is(tok::semi))
|
||||
return;
|
||||
if (Line->Tokens.size() == 0) {
|
||||
// Common issue: Automatic Semicolon Insertion wrapped the line, so the
|
||||
// import statement should terminate.
|
||||
return;
|
||||
}
|
||||
if (FormatTok->is(tok::l_brace)) {
|
||||
FormatTok->BlockKind = BK_Block;
|
||||
parseBracedList();
|
||||
|
@ -520,6 +520,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
||||
Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists;
|
||||
|
||||
Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes);
|
||||
Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers);
|
||||
Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
|
||||
Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables);
|
||||
Opts.UseRegisterSizedBitfieldAccess = Args.hasArg(
|
||||
|
@ -127,6 +127,12 @@ GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
|
||||
return OS;
|
||||
}
|
||||
|
||||
bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI,
|
||||
StringRef Filename) {
|
||||
CI.getLangOpts().CompilingPCH = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<ASTConsumer>
|
||||
GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
|
||||
StringRef InFile) {
|
||||
|
@ -53,6 +53,7 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
|
||||
assert(D);
|
||||
SymbolInfo Info;
|
||||
Info.Kind = SymbolKind::Unknown;
|
||||
Info.SubKind = SymbolSubKind::None;
|
||||
Info.Properties = SymbolPropertySet();
|
||||
Info.Lang = SymbolLanguage::C;
|
||||
|
||||
@ -183,10 +184,16 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
|
||||
Info.Kind = SymbolKind::NamespaceAlias;
|
||||
Info.Lang = SymbolLanguage::CXX;
|
||||
break;
|
||||
case Decl::CXXConstructor:
|
||||
case Decl::CXXConstructor: {
|
||||
Info.Kind = SymbolKind::Constructor;
|
||||
Info.Lang = SymbolLanguage::CXX;
|
||||
auto *CD = cast<CXXConstructorDecl>(D);
|
||||
if (CD->isCopyConstructor())
|
||||
Info.SubKind = SymbolSubKind::CXXCopyConstructor;
|
||||
else if (CD->isMoveConstructor())
|
||||
Info.SubKind = SymbolSubKind::CXXMoveConstructor;
|
||||
break;
|
||||
}
|
||||
case Decl::CXXDestructor:
|
||||
Info.Kind = SymbolKind::Destructor;
|
||||
Info.Lang = SymbolLanguage::CXX;
|
||||
@ -363,6 +370,15 @@ StringRef index::getSymbolKindString(SymbolKind K) {
|
||||
llvm_unreachable("invalid symbol kind");
|
||||
}
|
||||
|
||||
StringRef index::getSymbolSubKindString(SymbolSubKind K) {
|
||||
switch (K) {
|
||||
case SymbolSubKind::None: return "<none>";
|
||||
case SymbolSubKind::CXXCopyConstructor: return "cxx-copy-ctor";
|
||||
case SymbolSubKind::CXXMoveConstructor: return "cxx-move-ctor";
|
||||
}
|
||||
llvm_unreachable("invalid symbol subkind");
|
||||
}
|
||||
|
||||
StringRef index::getSymbolLanguageString(SymbolLanguage K) {
|
||||
switch (K) {
|
||||
case SymbolLanguage::C: return "C";
|
||||
|
@ -1996,10 +1996,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
|
||||
|
||||
// Ask HeaderInfo if we should enter this #include file. If not, #including
|
||||
// this file will have no effect.
|
||||
bool SkipHeader = false;
|
||||
if (ShouldEnter &&
|
||||
!HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
|
||||
SuggestedModule.getModule())) {
|
||||
ShouldEnter = false;
|
||||
SkipHeader = true;
|
||||
if (Callbacks)
|
||||
Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
|
||||
}
|
||||
@ -2008,6 +2010,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
|
||||
if (!ShouldEnter) {
|
||||
// If this is a module import, make it visible if needed.
|
||||
if (auto *M = SuggestedModule.getModule()) {
|
||||
// When building a pch, -fmodule-name tells the compiler to textually
|
||||
// include headers in the specified module. But it is possible that
|
||||
// ShouldEnter is false because we are skipping the header. In that
|
||||
// case, We are not importing the specified module.
|
||||
if (SkipHeader && getLangOpts().CompilingPCH &&
|
||||
M->getTopLevelModuleName() == getLangOpts().CurrentModule)
|
||||
return;
|
||||
|
||||
makeModuleVisible(M, HashLoc);
|
||||
|
||||
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
|
||||
@ -2032,6 +2042,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
|
||||
|
||||
// Determine if we're switching to building a new submodule, and which one.
|
||||
if (auto *M = SuggestedModule.getModule()) {
|
||||
// When building a pch, -fmodule-name tells the compiler to textually
|
||||
// include headers in the specified module. We are not building the
|
||||
// specified module.
|
||||
if (getLangOpts().CompilingPCH &&
|
||||
M->getTopLevelModuleName() == getLangOpts().CurrentModule)
|
||||
return;
|
||||
|
||||
assert(!CurSubmodule && "should not have marked this as a module yet");
|
||||
CurSubmodule = M;
|
||||
|
||||
|
@ -306,10 +306,11 @@ unsigned Parser::ParseAttributeArgsCommon(
|
||||
|
||||
// Parse the non-empty comma-separated list of expressions.
|
||||
do {
|
||||
bool ShouldEnter = attributeParsedArgsUnevaluated(*AttrName);
|
||||
bool Uneval = attributeParsedArgsUnevaluated(*AttrName);
|
||||
EnterExpressionEvaluationContext Unevaluated(
|
||||
Actions, Sema::Unevaluated, /*LambdaContextDecl=*/nullptr,
|
||||
/*IsDecltype=*/false, ShouldEnter);
|
||||
Actions, Uneval ? Sema::Unevaluated : Sema::ConstantEvaluated,
|
||||
/*LambdaContextDecl=*/nullptr,
|
||||
/*IsDecltype=*/false);
|
||||
|
||||
ExprResult ArgExpr(
|
||||
Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()));
|
||||
|
@ -404,6 +404,10 @@ ExprResult Parser::ParseBraceInitializer() {
|
||||
return Actions.ActOnInitList(LBraceLoc, None, ConsumeBrace());
|
||||
}
|
||||
|
||||
// Enter an appropriate expression evaluation context for an initializer list.
|
||||
EnterExpressionEvaluationContext EnterContext(
|
||||
Actions, EnterExpressionEvaluationContext::InitList);
|
||||
|
||||
bool InitExprsOk = true;
|
||||
|
||||
while (1) {
|
||||
|
@ -1242,7 +1242,8 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
|
||||
QualType RHSTy = RHS.get()->getType();
|
||||
|
||||
llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
|
||||
bool IsPolyUnsigned = Arch == llvm::Triple::aarch64;
|
||||
bool IsPolyUnsigned = Arch == llvm::Triple::aarch64 ||
|
||||
Arch == llvm::Triple::aarch64_be;
|
||||
bool IsInt64Long =
|
||||
Context.getTargetInfo().getInt64Type() == TargetInfo::SignedLong;
|
||||
QualType EltTy =
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user