Merge llvm, clang, lld and lldb trunk r291476.

This commit is contained in:
dim 2017-01-09 22:32:19 +00:00
commit 618592e561
136 changed files with 4340 additions and 1645 deletions

View File

@ -1491,6 +1491,8 @@ class ScalarEvolution {
void print(raw_ostream &OS) const;
void verify() const;
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
/// Collect parametric terms occurring in step expressions (first step of
/// delinearization).

View File

@ -290,7 +290,7 @@ class TargetLibraryInfo {
}
/// Returns extension attribute kind to be used for i32 parameters
/// correpsonding to C-level int or unsigned int. May be zeroext, signext,
/// corresponding to C-level int or unsigned int. May be zeroext, signext,
/// or none.
Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const {
if (Impl->ShouldExtI32Param)
@ -301,7 +301,7 @@ class TargetLibraryInfo {
}
/// Returns extension attribute kind to be used for i32 return values
/// correpsonding to C-level int or unsigned int. May be zeroext, signext,
/// corresponding to C-level int or unsigned int. May be zeroext, signext,
/// or none.
Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const {
if (Impl->ShouldExtI32Return)

View File

@ -308,6 +308,16 @@ class MachineBasicBlock
// Iteration support for live in sets. These sets are kept in sorted
// order by their register number.
typedef LiveInVector::const_iterator livein_iterator;
#ifndef NDEBUG
/// Unlike livein_begin, this method does not check that the liveness
/// information is accurate. Still for debug purposes it may be useful
/// to have iterators that won't assert if the liveness information
/// is not current.
livein_iterator livein_begin_dbg() const { return LiveIns.begin(); }
iterator_range<livein_iterator> liveins_dbg() const {
return make_range(livein_begin_dbg(), livein_end());
}
#endif
livein_iterator livein_begin() const;
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }

View File

@ -148,8 +148,7 @@ class MachineFrameInfo {
/// grouping overaligned allocas into a "secondary stack frame" and
/// then only use a single alloca to allocate this frame and only a
/// single virtual register to access it. Currently, without such an
/// optimization, each such alloca gets it's own dynamic
/// realignment.
/// optimization, each such alloca gets its own dynamic realignment.
bool StackRealignable;
/// Whether the function has the \c alignstack attribute.

View File

@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_MSF_STREAMARRAY_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/DebugInfo/MSF/StreamRef.h"
#include "llvm/Support/Error.h"
#include <cassert>
@ -107,7 +108,10 @@ class VarStreamArray {
Extractor E;
};
template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
template <typename ValueType, typename Extractor>
class VarStreamArrayIterator
: public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
std::forward_iterator_tag, ValueType> {
typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
typedef VarStreamArray<ValueType, Extractor> ArrayType;
@ -144,41 +148,39 @@ template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
return false;
}
bool operator!=(const IterType &R) { return !(*this == R); }
const ValueType &operator*() const {
assert(Array && !HasError);
return ThisValue;
}
IterType &operator++() {
// We are done with the current record, discard it so that we are
// positioned at the next record.
IterRef = IterRef.drop_front(ThisLen);
if (IterRef.getLength() == 0) {
// There is nothing after the current record, we must make this an end
// iterator.
moveToEnd();
} else {
// There is some data after the current record.
auto EC = Extract(IterRef, ThisLen, ThisValue);
if (EC) {
consumeError(std::move(EC));
markError();
} else if (ThisLen == 0) {
// An empty record? Make this an end iterator.
IterType &operator+=(std::ptrdiff_t N) {
while (N > 0) {
// We are done with the current record, discard it so that we are
// positioned at the next record.
IterRef = IterRef.drop_front(ThisLen);
if (IterRef.getLength() == 0) {
// There is nothing after the current record, we must make this an end
// iterator.
moveToEnd();
return *this;
} else {
// There is some data after the current record.
auto EC = Extract(IterRef, ThisLen, ThisValue);
if (EC) {
consumeError(std::move(EC));
markError();
return *this;
} else if (ThisLen == 0) {
// An empty record? Make this an end iterator.
moveToEnd();
return *this;
}
}
--N;
}
return *this;
}
IterType operator++(int) {
IterType Original = *this;
++*this;
return Original;
}
private:
void moveToEnd() {
Array = nullptr;
@ -211,6 +213,16 @@ template <typename T> class FixedStreamArray {
assert(Stream.getLength() % sizeof(T) == 0);
}
bool operator==(const FixedStreamArray<T> &Other) const {
return Stream == Other.Stream;
}
bool operator!=(const FixedStreamArray<T> &Other) const {
return !(*this == Other);
}
FixedStreamArray &operator=(const FixedStreamArray &) = default;
const T &operator[](uint32_t Index) const {
assert(Index < size());
uint32_t Off = Index * sizeof(T);
@ -226,6 +238,8 @@ template <typename T> class FixedStreamArray {
uint32_t size() const { return Stream.getLength() / sizeof(T); }
bool empty() const { return size() == 0; }
FixedStreamArrayIterator<T> begin() const {
return FixedStreamArrayIterator<T>(*this, 0);
}
@ -240,36 +254,53 @@ template <typename T> class FixedStreamArray {
ReadableStreamRef Stream;
};
template <typename T> class FixedStreamArrayIterator {
template <typename T>
class FixedStreamArrayIterator
: public iterator_facade_base<FixedStreamArrayIterator<T>,
std::random_access_iterator_tag, T> {
public:
FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
: Array(Array), Index(Index) {}
bool operator==(const FixedStreamArrayIterator<T> &R) {
assert(&Array == &R.Array);
return Index == R.Index;
}
bool operator!=(const FixedStreamArrayIterator<T> &R) {
return !(*this == R);
FixedStreamArrayIterator<T> &
operator=(const FixedStreamArrayIterator<T> &Other) {
Array = Other.Array;
Index = Other.Index;
return *this;
}
const T &operator*() const { return Array[Index]; }
FixedStreamArrayIterator<T> &operator++() {
assert(Index < Array.size());
++Index;
bool operator==(const FixedStreamArrayIterator<T> &R) const {
assert(Array == R.Array);
return (Index == R.Index) && (Array == R.Array);
}
FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
Index += N;
return *this;
}
FixedStreamArrayIterator<T> operator++(int) {
FixedStreamArrayIterator<T> Original = *this;
++*this;
return Original;
FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
assert(Index >= N);
Index -= N;
return *this;
}
std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
assert(Array == R.Array);
assert(Index >= R.Index);
return Index - R.Index;
}
bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
assert(Array == RHS.Array);
return Index < RHS.Index;
}
private:
const FixedStreamArray<T> &Array;
FixedStreamArray<T> Array;
uint32_t Index;
};

View File

@ -83,7 +83,7 @@ class SerializationTraits<
namespace remote {
class OrcRemoteTargetRPCAPI
: public rpc::SingleThreadedRPC<rpc::RawByteChannel> {
: public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
protected:
class ResourceIdMgr {
public:
@ -108,7 +108,7 @@ class OrcRemoteTargetRPCAPI
public:
// FIXME: Remove constructors once MSVC supports synthesizing move-ops.
OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C)
: rpc::SingleThreadedRPC<rpc::RawByteChannel>(C, true) {}
: rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(C, true) {}
class CallIntVoid
: public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> {

View File

@ -702,7 +702,7 @@ class CanDeserialize {
/// sync.
template <typename ImplT, typename ChannelT, typename FunctionIdT,
typename SequenceNumberT>
class RPCBase {
class RPCEndpointBase {
protected:
class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> {
public:
@ -747,7 +747,7 @@ class RPCBase {
public:
/// Construct an RPC instance on a channel.
RPCBase(ChannelT &C, bool LazyAutoNegotiation)
RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
: C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
// Hold ResponseId in a special variable, since we expect Response to be
// called relatively frequently, and want to avoid the map lookup.
@ -788,15 +788,21 @@ class RPCBase {
return FnIdOrErr.takeError();
}
// Allocate a sequence number.
auto SeqNo = SequenceNumberMgr.getSequenceNumber();
assert(!PendingResponses.count(SeqNo) &&
"Sequence number already allocated");
SequenceNumberT SeqNo; // initialized in locked scope below.
{
// Lock the pending responses map and sequence number manager.
std::lock_guard<std::mutex> Lock(ResponsesMutex);
// Install the user handler.
PendingResponses[SeqNo] =
// Allocate a sequence number.
SeqNo = SequenceNumberMgr.getSequenceNumber();
assert(!PendingResponses.count(SeqNo) &&
"Sequence number already allocated");
// Install the user handler.
PendingResponses[SeqNo] =
detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
std::move(Handler));
}
// Open the function call message.
if (auto Err = C.startSendMessage(FnId, SeqNo)) {
@ -863,11 +869,33 @@ class RPCBase {
return detail::ReadArgs<ArgTs...>(Args...);
}
/// Abandon all outstanding result handlers.
///
/// This will call all currently registered result handlers to receive an
/// "abandoned" error as their argument. This is used internally by the RPC
/// in error situations, but can also be called directly by clients who are
/// disconnecting from the remote and don't or can't expect responses to their
/// outstanding calls. (Especially for outstanding blocking calls, calling
/// this function may be necessary to avoid dead threads).
void abandonPendingResponses() {
// Lock the pending responses map and sequence number manager.
std::lock_guard<std::mutex> Lock(ResponsesMutex);
for (auto &KV : PendingResponses)
KV.second->abandon();
PendingResponses.clear();
SequenceNumberMgr.reset();
}
protected:
// The LaunchPolicy type allows a launch policy to be specified when adding
// a function handler. See addHandlerImpl.
using LaunchPolicy = std::function<Error(std::function<Error()>)>;
FunctionIdT getInvalidFunctionId() const {
return FnIdAllocator.getInvalidId();
}
/// Add the given handler to the handler map and make it available for
/// autonegotiation and execution.
template <typename Func, typename HandlerT>
@ -884,28 +912,32 @@ class RPCBase {
wrapHandler<Func>(std::move(Handler), std::move(Launch));
}
// Abandon all outstanding results.
void abandonPendingResponses() {
for (auto &KV : PendingResponses)
KV.second->abandon();
PendingResponses.clear();
SequenceNumberMgr.reset();
}
Error handleResponse(SequenceNumberT SeqNo) {
auto I = PendingResponses.find(SeqNo);
if (I == PendingResponses.end()) {
abandonPendingResponses();
return orcError(OrcErrorCode::UnexpectedRPCResponse);
using Handler = typename decltype(PendingResponses)::mapped_type;
Handler PRHandler;
{
// Lock the pending responses map and sequence number manager.
std::unique_lock<std::mutex> Lock(ResponsesMutex);
auto I = PendingResponses.find(SeqNo);
if (I != PendingResponses.end()) {
PRHandler = std::move(I->second);
PendingResponses.erase(I);
SequenceNumberMgr.releaseSequenceNumber(SeqNo);
} else {
// Unlock the pending results map to prevent recursive lock.
Lock.unlock();
abandonPendingResponses();
return orcError(OrcErrorCode::UnexpectedRPCResponse);
}
}
auto PRHandler = std::move(I->second);
PendingResponses.erase(I);
SequenceNumberMgr.releaseSequenceNumber(SeqNo);
assert(PRHandler &&
"If we didn't find a response handler we should have bailed out");
if (auto Err = PRHandler->handleResponse(C)) {
abandonPendingResponses();
SequenceNumberMgr.reset();
return Err;
}
@ -915,7 +947,7 @@ class RPCBase {
FunctionIdT handleNegotiate(const std::string &Name) {
auto I = LocalFunctionIds.find(Name);
if (I == LocalFunctionIds.end())
return FnIdAllocator.getInvalidId();
return getInvalidFunctionId();
return I->second;
}
@ -938,7 +970,7 @@ class RPCBase {
// If autonegotiation indicates that the remote end doesn't support this
// function, return an unknown function error.
if (RemoteId == FnIdAllocator.getInvalidId())
if (RemoteId == getInvalidFunctionId())
return orcError(OrcErrorCode::UnknownRPCFunction);
// Autonegotiation succeeded and returned a valid id. Update the map and
@ -1012,6 +1044,7 @@ class RPCBase {
std::map<FunctionIdT, WrappedHandlerFn> Handlers;
std::mutex ResponsesMutex;
detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
PendingResponses;
@ -1021,17 +1054,18 @@ class RPCBase {
template <typename ChannelT, typename FunctionIdT = uint32_t,
typename SequenceNumberT = uint32_t>
class MultiThreadedRPC
: public detail::RPCBase<
MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
FunctionIdT, SequenceNumberT> {
class MultiThreadedRPCEndpoint
: public detail::RPCEndpointBase<
MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT> {
private:
using BaseClass =
detail::RPCBase<MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT>;
detail::RPCEndpointBase<
MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT>;
public:
MultiThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
: BaseClass(C, LazyAutoNegotiation) {}
/// The LaunchPolicy type allows a launch policy to be specified when adding
@ -1061,30 +1095,41 @@ class MultiThreadedRPC
std::move(Launch));
}
/// Add a class-method as a handler.
template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...),
LaunchPolicy Launch = LaunchPolicy()) {
addHandler<Func>(
detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method),
Launch);
}
/// Negotiate a function id for Func with the other end of the channel.
template <typename Func> Error negotiateFunction() {
template <typename Func> Error negotiateFunction(bool Retry = false) {
using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
// Check if we already have a function id...
auto I = this->RemoteFunctionIds.find(Func::getPrototype());
if (I != this->RemoteFunctionIds.end()) {
// If it's valid there's nothing left to do.
if (I->second != this->getInvalidFunctionId())
return Error::success();
// If it's invalid and we can't re-attempt negotiation, throw an error.
if (!Retry)
return orcError(OrcErrorCode::UnknownRPCFunction);
}
// We don't have a function id for Func yet, call the remote to try to
// negotiate one.
if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
if (*RemoteIdOrErr == this->getInvalidFunctionId())
return orcError(OrcErrorCode::UnknownRPCFunction);
return Error::success();
} else
return RemoteIdOrErr.takeError();
}
/// Convenience method for negotiating multiple functions at once.
template <typename Func> Error negotiateFunctions() {
return negotiateFunction<Func>();
}
/// Convenience method for negotiating multiple functions at once.
template <typename Func1, typename Func2, typename... Funcs>
Error negotiateFunctions() {
if (auto Err = negotiateFunction<Func1>())
return Err;
return negotiateFunctions<Func2, Funcs...>();
}
/// Return type for non-blocking call primitives.
template <typename Func>
using NonBlockingCallResult = typename detail::ResultTraits<
@ -1169,19 +1214,20 @@ class MultiThreadedRPC
template <typename ChannelT, typename FunctionIdT = uint32_t,
typename SequenceNumberT = uint32_t>
class SingleThreadedRPC
: public detail::RPCBase<
SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
FunctionIdT, SequenceNumberT> {
class SingleThreadedRPCEndpoint
: public detail::RPCEndpointBase<
SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT> {
private:
using BaseClass =
detail::RPCBase<SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT>;
detail::RPCEndpointBase<
SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
ChannelT, FunctionIdT, SequenceNumberT>;
using LaunchPolicy = typename BaseClass::LaunchPolicy;
public:
SingleThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
: BaseClass(C, LazyAutoNegotiation) {}
template <typename Func, typename HandlerT>
@ -1197,29 +1243,31 @@ class SingleThreadedRPC
}
/// Negotiate a function id for Func with the other end of the channel.
template <typename Func> Error negotiateFunction() {
template <typename Func> Error negotiateFunction(bool Retry = false) {
using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
// Check if we already have a function id...
auto I = this->RemoteFunctionIds.find(Func::getPrototype());
if (I != this->RemoteFunctionIds.end()) {
// If it's valid there's nothing left to do.
if (I->second != this->getInvalidFunctionId())
return Error::success();
// If it's invalid and we can't re-attempt negotiation, throw an error.
if (!Retry)
return orcError(OrcErrorCode::UnknownRPCFunction);
}
// We don't have a function id for Func yet, call the remote to try to
// negotiate one.
if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
if (*RemoteIdOrErr == this->getInvalidFunctionId())
return orcError(OrcErrorCode::UnknownRPCFunction);
return Error::success();
} else
return RemoteIdOrErr.takeError();
}
/// Convenience method for negotiating multiple functions at once.
template <typename Func> Error negotiateFunctions() {
return negotiateFunction<Func>();
}
/// Convenience method for negotiating multiple functions at once.
template <typename Func1, typename Func2, typename... Funcs>
Error negotiateFunctions() {
if (auto Err = negotiateFunction<Func1>())
return Err;
return negotiateFunctions<Func2, Funcs...>();
}
template <typename Func, typename... ArgTs,
typename AltRetT = typename Func::ReturnType>
typename detail::ResultTraits<AltRetT>::ErrorReturnType
@ -1332,6 +1380,68 @@ template <typename RPCClass> class ParallelCallGroup {
uint32_t NumOutstandingCalls;
};
/// @brief Convenience class for grouping RPC Functions into APIs that can be
/// negotiated as a block.
///
template <typename... Funcs>
class APICalls {
public:
/// @brief Test whether this API contains Function F.
template <typename F>
class Contains {
public:
static const bool value = false;
};
/// @brief Negotiate all functions in this API.
template <typename RPCEndpoint>
static Error negotiate(RPCEndpoint &R) {
return Error::success();
}
};
template <typename Func, typename... Funcs>
class APICalls<Func, Funcs...> {
public:
template <typename F>
class Contains {
public:
static const bool value = std::is_same<F, Func>::value |
APICalls<Funcs...>::template Contains<F>::value;
};
template <typename RPCEndpoint>
static Error negotiate(RPCEndpoint &R) {
if (auto Err = R.template negotiateFunction<Func>())
return Err;
return APICalls<Funcs...>::negotiate(R);
}
};
template <typename... InnerFuncs, typename... Funcs>
class APICalls<APICalls<InnerFuncs...>, Funcs...> {
public:
template <typename F>
class Contains {
public:
static const bool value =
APICalls<InnerFuncs...>::template Contains<F>::value |
APICalls<Funcs...>::template Contains<F>::value;
};
template <typename RPCEndpoint>
static Error negotiate(RPCEndpoint &R) {
if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
return Err;
return APICalls<Funcs...>::negotiate(R);
}
};
} // end namespace rpc
} // end namespace orc
} // end namespace llvm

View File

@ -48,9 +48,7 @@ class RawByteChannel {
template <typename FunctionIdT, typename SequenceIdT>
Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
writeLock.lock();
if (auto Err = serializeSeq(*this, FnId, SeqNo))
return Err;
return Error::success();
return serializeSeq(*this, FnId, SeqNo);
}
/// Notify the channel that we're ending a message send.

View File

@ -28,14 +28,14 @@ template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
template <> struct MappingTraits<TypeTestResolution> {
static void mapping(IO &io, TypeTestResolution &res) {
io.mapRequired("Kind", res.TheKind);
io.mapRequired("SizeBitWidth", res.SizeBitWidth);
io.mapOptional("Kind", res.TheKind);
io.mapOptional("SizeBitWidth", res.SizeBitWidth);
}
};
template <> struct MappingTraits<TypeIdSummary> {
static void mapping(IO &io, TypeIdSummary& summary) {
io.mapRequired("TTRes", summary.TTRes);
io.mapOptional("TTRes", summary.TTRes);
}
};
@ -53,7 +53,7 @@ namespace yaml {
template <> struct MappingTraits<FunctionSummaryYaml> {
static void mapping(IO &io, FunctionSummaryYaml& summary) {
io.mapRequired("TypeTests", summary.TypeTests);
io.mapOptional("TypeTests", summary.TypeTests);
}
};
@ -100,8 +100,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
template <> struct MappingTraits<ModuleSummaryIndex> {
static void mapping(IO &io, ModuleSummaryIndex& index) {
io.mapRequired("GlobalValueMap", index.GlobalValueMap);
io.mapRequired("TypeIdMap", index.TypeIdMap);
io.mapOptional("GlobalValueMap", index.GlobalValueMap);
io.mapOptional("TypeIdMap", index.TypeIdMap);
}
};

View File

@ -879,18 +879,22 @@ extern template class AnalysisManager<Function>;
/// \brief Convenience typedef for the Function analysis manager.
typedef AnalysisManager<Function> FunctionAnalysisManager;
/// \brief A module analysis which acts as a proxy for a function analysis
/// manager.
/// \brief An analysis over an "outer" IR unit that provides access to an
/// analysis manager over an "inner" IR unit. The inner unit must be contained
/// in the outer unit.
///
/// This primarily proxies invalidation information from the module analysis
/// manager and module pass manager to a function analysis manager. You should
/// never use a function analysis manager from within (transitively) a module
/// pass manager unless your parent module pass has received a proxy result
/// object for it.
/// Fore example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
/// an analysis over Modules (the "outer" unit) that provides access to a
/// Function analysis manager. The FunctionAnalysisManager is the "inner"
/// manager being proxied, and Functions are the "inner" unit. The inner/outer
/// relationship is valid because each Function is contained in one Module.
///
/// Note that the proxy's result is a move-only object and represents ownership
/// of the validity of the analyses in the \c FunctionAnalysisManager it
/// provides.
/// If you're (transitively) within a pass manager for an IR unit U that
/// contains IR unit V, you should never use an analysis manager over V, except
/// via one of these proxies.
///
/// Note that the proxy's result is a move-only RAII object. The validity of
/// the analyses in the inner analysis manager is tied to its lifetime.
template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
class InnerAnalysisManagerProxy
: public AnalysisInfoMixin<
@ -926,23 +930,16 @@ class InnerAnalysisManagerProxy
/// \brief Accessor for the analysis manager.
AnalysisManagerT &getManager() { return *InnerAM; }
/// \brief Handler for invalidation of the outer IR unit.
/// \brief Handler for invalidation of the outer IR unit, \c IRUnitT.
///
/// If this analysis itself is preserved, then we assume that the set of \c
/// IR units that the inner analysis manager controls hasn't changed and
/// thus we don't need to invalidate *all* cached data associated with any
/// \c IRUnitT* in the \c AnalysisManagerT.
/// If the proxy analysis itself is not preserved, we assume that the set of
/// inner IR objects contained in IRUnit may have changed. In this case,
/// we have to call \c clear() on the inner analysis manager, as it may now
/// have stale pointers to its inner IR objects.
///
/// Regardless of whether this analysis is marked as preserved, all of the
/// analyses in the \c AnalysisManagerT are potentially invalidated (for
/// the relevant inner set of their IR units) based on the set of preserved
/// analyses.
///
/// Because this needs to understand the mapping from one IR unit to an
/// inner IR unit, this method isn't defined in the primary template.
/// Instead, each specialization of this template will need to provide an
/// explicit specialization of this method to handle that particular pair
/// of IR unit and inner AnalysisManagerT.
/// Regardless of whether the proxy analysis is marked as preserved, all of
/// the analyses in the inner analysis manager are potentially invalidated
/// based on the set of preserved analyses.
bool invalidate(
IRUnitT &IR, const PreservedAnalyses &PA,
typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &Inv);
@ -956,13 +953,9 @@ class InnerAnalysisManagerProxy
/// \brief Run the analysis pass and create our proxy result object.
///
/// This doesn't do any interesting work, it is primarily used to insert our
/// proxy result object into the module analysis cache so that we can proxy
/// invalidation to the function analysis manager.
///
/// In debug builds, it will also assert that the analysis manager is empty
/// as no queries should arrive at the function analysis manager prior to
/// this analysis being requested.
/// This doesn't do any interesting work; it is primarily used to insert our
/// proxy result object into the outer analysis cache so that we can proxy
/// invalidation to the inner analysis manager.
Result run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
ExtraArgTs...) {
return Result(*InnerAM);
@ -996,22 +989,24 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
Module>;
/// \brief A function analysis which acts as a proxy for a module analysis
/// manager.
/// \brief An analysis over an "inner" IR unit that provides access to an
/// analysis manager over a "outer" IR unit. The inner unit must be contained
/// in the outer unit.
///
/// This primarily provides an accessor to a parent module analysis manager to
/// function passes. Only the const interface of the module analysis manager is
/// provided to indicate that once inside of a function analysis pass you
/// cannot request a module analysis to actually run. Instead, the user must
/// rely on the \c getCachedResult API.
/// For example OuterAnalysisManagerProxy<ModuleAnalysisManager, Function> is an
/// analysis over Functions (the "inner" unit) which provides access to a Module
/// analysis manager. The ModuleAnalysisManager is the "outer" manager being
/// proxied, and Modules are the "outer" IR unit. The inner/outer relationship
/// is valid because each Function is contained in one Module.
///
/// The invalidation provided by this proxy involves tracking when an
/// invalidation event in the outer analysis manager needs to trigger an
/// invalidation of a particular analysis on this IR unit.
/// This proxy only exposes the const interface of the outer analysis manager,
/// to indicate that you cannot cause an outer analysis to run from within an
/// inner pass. Instead, you must rely on the \c getCachedResult API.
///
/// Because outer analyses aren't invalidated while these IR units are being
/// precessed, we have to register and handle these as deferred invalidation
/// events.
/// This proxy doesn't manage invalidation in any way -- that is handled by the
/// recursive return path of each layer of the pass manager. A consequence of
/// this is the outer analyses may be stale. We invalidate the outer analyses
/// only when we're done running passes over the inner IR units.
template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
class OuterAnalysisManagerProxy
: public AnalysisInfoMixin<
@ -1024,7 +1019,7 @@ class OuterAnalysisManagerProxy
const AnalysisManagerT &getManager() const { return *AM; }
/// \brief Handle invalidation by ignoring it, this pass is immutable.
/// \brief Handle invalidation by ignoring it; this pass is immutable.
bool invalidate(
IRUnitT &, const PreservedAnalyses &,
typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &) {
@ -1089,18 +1084,15 @@ AnalysisKey
extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
Function>;
/// Provide the \c ModuleAnalysisManager to \c Fucntion proxy.
/// Provide the \c ModuleAnalysisManager to \c Function proxy.
typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
ModuleAnalysisManagerFunctionProxy;
/// \brief Trivial adaptor that maps from a module to its functions.
///
/// Designed to allow composition of a FunctionPass(Manager) and
/// a ModulePassManager. Note that if this pass is constructed with a pointer
/// to a \c ModuleAnalysisManager it will run the
/// \c FunctionAnalysisManagerModuleProxy analysis prior to running the function
/// pass over the module to enable a \c FunctionAnalysisManager to be used
/// within this run safely.
/// a ModulePassManager, by running the FunctionPass(Manager) over every
/// function in the module.
///
/// Function passes run within this adaptor can rely on having exclusive access
/// to the function they are run over. They should not read or modify any other
@ -1115,6 +1107,10 @@ typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
/// module.
/// FIXME: Make the above true for all of LLVM's actual passes, some still
/// violate this principle.
///
/// Note that although function passes can access module analyses, module
/// analyses are not invalidated while the function passes are running, so they
/// may be stale. Function analyses will not be stale.
template <typename FunctionPassT>
class ModuleToFunctionPassAdaptor
: public PassInfoMixin<ModuleToFunctionPassAdaptor<FunctionPassT>> {
@ -1124,7 +1120,6 @@ class ModuleToFunctionPassAdaptor
/// \brief Runs the function pass across every function in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
// Setup the function analysis manager from its proxy.
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@ -1145,10 +1140,11 @@ class ModuleToFunctionPassAdaptor
PA.intersect(std::move(PassPA));
}
// By definition we preserve the proxy. We also preserve all analyses on
// Function units. This precludes *any* invalidation of function analyses
// by the proxy, but that's OK because we've taken care to invalidate
// analyses in the function analysis manager incrementally above.
// The FunctionAnalysisManagerModuleProxy is preserved because (we assume)
// the function passes we ran didn't add or remove any functions.
//
// We also preserve all analyses on Functions, because we did all the
// invalidation we needed to do above.
PA.preserveSet<AllAnalysesOn<Function>>();
PA.preserve<FunctionAnalysisManagerModuleProxy>();
return PA;
@ -1166,7 +1162,7 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
}
/// \brief A template utility pass to force an analysis result to be available.
/// \brief A utility pass template to force an analysis result to be available.
///
/// If there are extra arguments at the pass's run level there may also be
/// extra arguments to the analysis manager's \c getResult routine. We can't
@ -1196,17 +1192,14 @@ struct RequireAnalysisPass
}
};
/// \brief A template utility pass to force an analysis result to be
/// invalidated.
///
/// This is a no-op pass which simply forces a specific analysis result to be
/// invalidated when it is run.
/// \brief A no-op pass template which simply forces a specific analysis result
/// to be invalidated.
template <typename AnalysisT>
struct InvalidateAnalysisPass
: PassInfoMixin<InvalidateAnalysisPass<AnalysisT>> {
/// \brief Run this pass over some unit of IR.
///
/// This pass can be run over any unit of IR and use any analysis manager
/// This pass can be run over any unit of IR and use any analysis manager,
/// provided they satisfy the basic API requirements. When this pass is
/// created, these methods can be instantiated to satisfy whatever the
/// context requires.
@ -1218,10 +1211,10 @@ struct InvalidateAnalysisPass
}
};
/// \brief A utility pass that does nothing but preserves no analyses.
/// \brief A utility pass that does nothing, but preserves no analyses.
///
/// As a consequence fo not preserving any analyses, this pass will force all
/// analysis passes to be re-run to produce fresh results if any are needed.
/// Because this preserves no analyses, any analysis passes queried after this
/// pass runs will recompute fresh results.
struct InvalidateAllAnalysesPass : PassInfoMixin<InvalidateAllAnalysesPass> {
/// \brief Run this pass over some unit of IR.
template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>

View File

@ -238,6 +238,26 @@ class User : public Value {
return make_range(value_op_begin(), value_op_end());
}
struct const_value_op_iterator
: iterator_adaptor_base<const_value_op_iterator, const_op_iterator,
std::random_access_iterator_tag, const Value *,
ptrdiff_t, const Value *, const Value *> {
explicit const_value_op_iterator(const Use *U = nullptr) :
iterator_adaptor_base(U) {}
const Value *operator*() const { return *I; }
const Value *operator->() const { return operator*(); }
};
const_value_op_iterator value_op_begin() const {
return const_value_op_iterator(op_begin());
}
const_value_op_iterator value_op_end() const {
return const_value_op_iterator(op_end());
}
iterator_range<const_value_op_iterator> operand_values() const {
return make_range(value_op_begin(), value_op_end());
}
/// \brief Drop all references to operands.
///
/// This function is in charge of "letting go" of all objects that this User

View File

@ -207,6 +207,14 @@ void native(const Twine &path, SmallVectorImpl<char> &result);
/// @param path A path that is transformed to native format.
void native(SmallVectorImpl<char> &path);
/// @brief Replaces backslashes with slashes if Windows.
///
/// @param path processed path
/// @result The result of replacing backslashes with forward slashes if Windows.
/// On Unix, this function is a no-op because backslashes are valid path
/// chracters.
std::string convert_to_slash(StringRef path);
/// @}
/// @name Lexical Observers
/// @{

View File

@ -215,9 +215,20 @@ ModulePass *createMetaRenamerPass();
/// manager.
ModulePass *createBarrierNoopPass();
/// What to do with the summary when running the LowerTypeTests pass.
enum class LowerTypeTestsSummaryAction {
None, ///< Do nothing.
Import, ///< Import typeid resolutions from summary and globals.
Export, ///< Export typeid resolutions to summary and globals.
};
/// \brief This pass lowers type metadata and the llvm.type.test intrinsic to
/// bitsets.
ModulePass *createLowerTypeTestsPass();
/// \param Action What to do with the summary passed as Index.
/// \param Index The summary to use for importing or exporting, this can be null
/// when Action is None.
ModulePass *createLowerTypeTestsPass(LowerTypeTestsSummaryAction Action,
ModuleSummaryIndex *Index);
/// \brief This pass export CFI checks for use by external modules.
ModulePass *createCrossDSOCFIPass();

View File

@ -21,7 +21,6 @@
#include <vector>
namespace llvm {
class ModuleSummaryIndex;
class Pass;
class TargetLibraryInfoImpl;
class TargetMachine;

View File

@ -1106,6 +1106,16 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
return V;
// udiv %V, C -> 0 if %V < C
if (MaxRecurse) {
if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
if (C->isAllOnesValue()) {
return Constant::getNullValue(Op0->getType());
}
}
}
return nullptr;
}
@ -1247,6 +1257,16 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
return V;
// urem %V, C -> %V if %V < C
if (MaxRecurse) {
if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
if (C->isAllOnesValue()) {
return Op0;
}
}
}
return nullptr;
}

View File

@ -179,9 +179,9 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
}
bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
// For each block we check that it doesn't have any uses outside of it's
// innermost loop. This process will transitivelly guarntee that current loop
// and all of the nested loops are in the LCSSA form.
// For each block we check that it doesn't have any uses outside of its
// innermost loop. This process will transitively guarantee that the current
// loop and all of the nested loops are in LCSSA form.
return all_of(this->blocks(), [&](const BasicBlock *BB) {
return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
});

View File

@ -344,38 +344,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
if (!InvariantGroupMD)
return MemDepResult::getUnknown();
Value *LoadOperand = LI->getPointerOperand();
// Take the ptr operand after all casts and geps 0. This way we can search
// cast graph down only.
Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
// It's is not safe to walk the use list of global value, because function
// passes aren't allowed to look outside their functions.
// FIXME: this could be fixed by filtering instructions from outside
// of current function.
if (isa<GlobalValue>(LoadOperand))
return MemDepResult::getUnknown();
// Queue to process all pointers that are equivalent to load operand.
SmallVector<const Value *, 8> LoadOperandsQueue;
SmallSet<const Value *, 14> SeenValues;
auto TryInsertToQueue = [&](Value *V) {
if (SeenValues.insert(V).second)
LoadOperandsQueue.push_back(V);
};
TryInsertToQueue(LoadOperand);
LoadOperandsQueue.push_back(LoadOperand);
while (!LoadOperandsQueue.empty()) {
const Value *Ptr = LoadOperandsQueue.pop_back_val();
assert(Ptr);
if (isa<GlobalValue>(Ptr))
continue;
// Value comes from bitcast: Ptr = bitcast x. Insert x.
if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
TryInsertToQueue(BCI->getOperand(0));
// Gep with zeros is equivalent to bitcast.
// FIXME: we are not sure if some bitcast should be canonicalized to gep 0
// or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
// pointers will be upstream then both cases will be gone (and this BFS
// also won't be needed).
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
if (GEP->hasAllZeroIndices())
TryInsertToQueue(GEP->getOperand(0));
assert(Ptr && !isa<GlobalValue>(Ptr) &&
"Null or GlobalValue should not be inserted");
for (const Use &Us : Ptr->uses()) {
auto *U = dyn_cast<Instruction>(Us.getUser());
@ -385,13 +371,17 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// Bitcast or gep with zeros are using Ptr. Add to queue to check it's
// users. U = bitcast Ptr
if (isa<BitCastInst>(U)) {
TryInsertToQueue(U);
LoadOperandsQueue.push_back(U);
continue;
}
// U = getelementptr Ptr, 0, 0...
// Gep with zeros is equivalent to bitcast.
// FIXME: we are not sure if some bitcast should be canonicalized to gep 0
// or gep 0 to bitcast because of SROA, so there are 2 forms. When
// typeless pointers will be ready then both cases will be gone
// (and this BFS also won't be needed).
if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
if (GEP->hasAllZeroIndices()) {
TryInsertToQueue(U);
LoadOperandsQueue.push_back(U);
continue;
}

View File

@ -10012,6 +10012,18 @@ void ScalarEvolution::verify() const {
// TODO: Verify more things.
}
bool ScalarEvolution::invalidate(
Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
// Invalidate the ScalarEvolution object whenever it isn't preserved or one
// of its dependencies is invalidated.
auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<AssumptionAnalysis>(F, PA) ||
Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
Inv.invalidate<LoopAnalysis>(F, PA);
}
AnalysisKey ScalarEvolutionAnalysis::Key;
ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,

View File

@ -3257,6 +3257,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Intrinsic::dbg_value:
return true;
case Intrinsic::bitreverse:
case Intrinsic::bswap:
case Intrinsic::ctlz:
case Intrinsic::ctpop:

View File

@ -429,7 +429,7 @@ class MetadataLoader::MetadataLoaderImpl {
/// Populate the index above to enable lazily loading of metadata, and load
/// the named metadata as well as the transitively referenced global
/// Metadata.
Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
Expected<bool> lazyLoadModuleMetadataBlock();
/// On-demand loading of a single metadata. Requires the index above to be
/// populated.
@ -516,8 +516,8 @@ Error error(const Twine &Message) {
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
PlaceholderQueue &Placeholders) {
Expected<bool>
MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
IndexCursor = Stream;
SmallVector<uint64_t, 64> Record;
// Get the abbrevs, and preload record positions to make them lazy-loadable.
@ -701,7 +701,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// then load individual record as needed, starting with the named metadata.
if (ModuleLevel && IsImporting && MetadataList.empty() &&
!DisableLazyLoading) {
auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
auto SuccessOrErr = lazyLoadModuleMetadataBlock();
if (!SuccessOrErr)
return SuccessOrErr.takeError();
if (SuccessOrErr.get()) {
@ -1561,7 +1561,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
return error("Invalid record");
SmallVector<uint64_t, 64> Record;
PlaceholderQueue Placeholders;
while (true) {
@ -1608,10 +1607,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
auto Idx = Record[i + 1];
if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
!MetadataList.lookup(Idx))
!MetadataList.lookup(Idx)) {
// Load the attachment if it is in the lazy-loadable range and hasn't
// been loaded yet.
lazyLoadOneMetadata(Idx, Placeholders);
resolveForwardRefsAndPlaceholders(Placeholders);
}
Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
if (isa<LocalAsMetadata>(Node))

View File

@ -1714,7 +1714,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
EVT CCT = getSetCCResultType(NVT);
// Hi part is always the same op
Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
// We need to know whether to select Lo part that corresponds to 'winning'
// Hi part or if Hi parts are equal.
@ -1725,7 +1725,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
// Recursed Lo part if Hi parts are equal, this uses unsigned version
SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
}

View File

@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
I != E; ++I) {
if (DCELimit != -1 && (int)NumDead >= DCELimit)
break;
int FirstSS, SecondSS;
if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
FirstSS != -1) {
@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
}
MachineBasicBlock::iterator NextMI = std::next(I);
if (NextMI == MBB->end()) continue;
MachineBasicBlock::iterator ProbableLoadMI = I;
unsigned LoadReg = 0;
unsigned StoreReg = 0;
if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
continue;
// Skip the ...pseudo debugging... instructions between a load and store.
while ((NextMI != E) && NextMI->isDebugValue()) {
++NextMI;
++I;
}
if (NextMI == E) continue;
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
++NumDead;
toErase.push_back(&*I);
toErase.push_back(&*ProbableLoadMI);
}
toErase.push_back(&*NextMI);

View File

@ -896,6 +896,48 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
return ELF::R_MIPS_NONE;
}
// Sometimes we don't need to create thunk for a branch.
// This typically happens when branch target is located
// in the same object file. In such case target is either
// a weak symbol or symbol in a different executable section.
// This function checks if branch target is located in the
// same object file and if distance between source and target
// fits R_AARCH64_CALL26 relocation. If both conditions are
// met, it emits direct jump to the target and returns true.
// Otherwise false is returned and thunk is created.
bool RuntimeDyldELF::resolveAArch64ShortBranch(
unsigned SectionID, relocation_iterator RelI,
const RelocationValueRef &Value) {
uint64_t Address;
if (Value.SymbolName) {
auto Loc = GlobalSymbolTable.find(Value.SymbolName);
// Don't create direct branch for external symbols.
if (Loc == GlobalSymbolTable.end())
return false;
const auto &SymInfo = Loc->second;
Address =
uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset(
SymInfo.getOffset()));
} else {
Address = uint64_t(Sections[Value.SectionID].getLoadAddress());
}
uint64_t Offset = RelI->getOffset();
uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset);
// R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27
// If distance between source and target is out of range then we should
// create thunk.
if (!isInt<28>(Address + Value.Addend - SourceAddress))
return false;
resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(),
Value.Addend);
return true;
}
Expected<relocation_iterator>
RuntimeDyldELF::processRelocationRef(
unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
@ -1003,7 +1045,7 @@ RuntimeDyldELF::processRelocationRef(
(uint64_t)Section.getAddressWithOffset(i->second),
RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
} else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
// Create a new stub function.
DEBUG(dbgs() << " Create a new stub function\n");
Stubs[Value] = Section.getStubOffset();

View File

@ -40,6 +40,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
void resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend);
bool resolveAArch64ShortBranch(unsigned SectionID, relocation_iterator RelI,
const RelocationValueRef &Value);
void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
uint32_t Value, uint32_t Type, int32_t Addend);

View File

@ -196,8 +196,15 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
};
FunctionImporter Importer(Index, Loader);
if (!Importer.importFunctions(TheModule, ImportList))
Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
if (!Result) {
handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
SourceMgr::DK_Error, EIB.message());
Err.print("ThinLTO", errs());
});
report_fatal_error("importFunctions failed");
}
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,

View File

@ -2823,7 +2823,11 @@ StringRef MachORebaseEntry::typeName() const {
}
bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
#else
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
#endif
return (Ptr == Other.Ptr) &&
(RemainingLoopCount == Other.RemainingLoopCount) &&
(Done == Other.Done);
@ -3073,7 +3077,11 @@ uint32_t MachOBindEntry::flags() const { return Flags; }
int MachOBindEntry::ordinal() const { return Ordinal; }
bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
#else
assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
#endif
return (Ptr == Other.Ptr) &&
(RemainingLoopCount == Other.RemainingLoopCount) &&
(Done == Other.Done);

View File

@ -22,6 +22,12 @@
using namespace llvm;
using namespace object;
static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
"ignore-empty-index-file", llvm::cl::ZeroOrMore,
llvm::cl::desc(
"Ignore an empty index file and perform non-ThinLTO compilation"),
llvm::cl::init(false));
ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile(
MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I)
: SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) {
@ -97,6 +103,8 @@ llvm::getModuleSummaryIndexForFile(StringRef Path) {
if (EC)
return errorCodeToError(EC);
MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize())
return nullptr;
Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
object::ModuleSummaryIndexObjectFile::create(BufferRef);
if (!ObjOrErr)

View File

@ -373,7 +373,7 @@ void Option::removeArgument() { GlobalParser->removeOption(this); }
void Option::setArgStr(StringRef S) {
if (FullyInitialized)
GlobalParser->updateArgStr(this, S);
assert(S[0] != '-' && "Option can't start with '-");
assert((S.empty() || S[0] != '-') && "Option can't start with '-");
ArgStr = S;
}

View File

@ -571,6 +571,16 @@ void native(SmallVectorImpl<char> &Path) {
#endif
}
std::string convert_to_slash(StringRef path) {
#ifdef LLVM_ON_WIN32
std::string s = path.str();
std::replace(s.begin(), s.end(), '\\', '/');
return s;
#else
return path;
#endif
}
StringRef filename(StringRef path) {
return *rbegin(path);
}

View File

@ -26,6 +26,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
using namespace llvm;
@ -109,27 +110,44 @@ static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
pad(OS);
}
// In the Ustar header, a path can be split at any '/' to store
// a path into UstarHeader::Name and UstarHeader::Prefix. This
// function splits a given path for that purpose.
static std::pair<StringRef, StringRef> splitPath(StringRef Path) {
if (Path.size() <= sizeof(UstarHeader::Name))
return {"", Path};
size_t Sep = Path.rfind('/', sizeof(UstarHeader::Name) + 1);
if (Sep == StringRef::npos)
return {"", Path};
return {Path.substr(0, Sep), Path.substr(Sep + 1)};
}
// Returns true if a given path can be stored to a Ustar header
// without the PAX extension.
static bool fitsInUstar(StringRef Path) {
StringRef Prefix;
StringRef Name;
std::tie(Prefix, Name) = splitPath(Path);
return Name.size() <= sizeof(UstarHeader::Name);
}
// The PAX header is an extended format, so a PAX header needs
// to be followed by a "real" header.
static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
StringRef Prefix;
StringRef Name;
std::tie(Prefix, Name) = splitPath(Path);
UstarHeader Hdr = {};
memcpy(Hdr.Name, Path.data(), Path.size());
memcpy(Hdr.Name, Name.data(), Name.size());
memcpy(Hdr.Mode, "0000664", 8);
snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
memcpy(Hdr.Magic, "ustar", 6);
memcpy(Hdr.Prefix, Prefix.data(), Prefix.size());
computeChecksum(Hdr);
OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
}
// We want to use '/' as a path separator even on Windows.
// This function canonicalizes a given path.
static std::string canonicalize(std::string S) {
#ifdef LLVM_ON_WIN32
std::replace(S.begin(), S.end(), '\\', '/');
#endif
return S;
}
// Creates a TarWriter instance and returns it.
Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
StringRef BaseDir) {
@ -145,8 +163,8 @@ TarWriter::TarWriter(int FD, StringRef BaseDir)
// Append a given file to an archive.
void TarWriter::append(StringRef Path, StringRef Data) {
// Write Path and Data.
std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
if (S.size() <= sizeof(UstarHeader::Name)) {
std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0";
if (fitsInUstar(S)) {
writeUstarHeader(OS, S, Data.size());
} else {
writePaxHeader(OS, S);

View File

@ -608,6 +608,10 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
Base = Addr.getOperand(0);

View File

@ -172,16 +172,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v2f64, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);

View File

@ -822,6 +822,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
bool isForcedDPP() const { return ForcedDPP; }
bool isForcedSDWA() const { return ForcedSDWA; }
ArrayRef<unsigned> getMatchedVariants() const;
std::unique_ptr<AMDGPUOperand> parseRegister();
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
@ -1630,31 +1631,44 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
// What asm variants we should check
ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
if (getForcedEncodingSize() == 32) {
static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
return makeArrayRef(Variants);
}
if (isForcedVOP3()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
return makeArrayRef(Variants);
}
if (isForcedSDWA()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA};
return makeArrayRef(Variants);
}
if (isForcedDPP()) {
static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
return makeArrayRef(Variants);
}
static const unsigned Variants[] = {
AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP
};
return makeArrayRef(Variants);
}
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
// What asm variants we should check
std::vector<unsigned> MatchedVariants;
if (getForcedEncodingSize() == 32) {
MatchedVariants = {AMDGPUAsmVariants::DEFAULT};
} else if (isForcedVOP3()) {
MatchedVariants = {AMDGPUAsmVariants::VOP3};
} else if (isForcedSDWA()) {
MatchedVariants = {AMDGPUAsmVariants::SDWA};
} else if (isForcedDPP()) {
MatchedVariants = {AMDGPUAsmVariants::DPP};
} else {
MatchedVariants = {AMDGPUAsmVariants::DEFAULT,
AMDGPUAsmVariants::VOP3,
AMDGPUAsmVariants::SDWA,
AMDGPUAsmVariants::DPP};
}
MCInst Inst;
unsigned Result = Match_Success;
for (auto Variant : MatchedVariants) {
for (auto Variant : getMatchedVariants()) {
uint64_t EI;
auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
Variant);
@ -3486,7 +3500,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if ((BasicInstType == SIInstrFlags::VOPC ||
if ((BasicInstType == SIInstrFlags::VOPC ||
BasicInstType == SIInstrFlags::VOP2)&&
Op.isReg() &&
Op.Reg.RegNo == AMDGPU::VCC) {

View File

@ -99,6 +99,18 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
// We need to include these since trunc STORES to PRIVATE need
// special handling to accommodate RMW
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
// Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
@ -1087,79 +1099,114 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
SelectionDAG &DAG) const {
SDLoc DL(Store);
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
unsigned Mask = 0;
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
Mask = 0xff;
assert(Store->getAlignment() >= 1);
Mask = DAG.getConstant(0xff, DL, MVT::i32);
} else if (Store->getMemoryVT() == MVT::i16) {
Mask = 0xffff;
assert(Store->getAlignment() >= 2);
Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
} else {
llvm_unreachable("Unsupported private trunc store");
}
SDValue Chain = Store->getChain();
SDValue BasePtr = Store->getBasePtr();
SDValue Offset = Store->getOffset();
EVT MemVT = Store->getMemoryVT();
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
DAG.getConstant(2, DL, MVT::i32));
SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
Chain, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
SDValue LoadPtr = BasePtr;
if (!Offset.isUndef()) {
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
}
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
// Get dword location
// TODO: this should be eliminated by the future SHR ptr, 2
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
DAG.getConstant(0xfffffffc, DL, MVT::i32));
// Load dword
// TODO: can we be smarter about machine pointer info?
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
Chain = Dst.getValue(1);
// Get offset in dword
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
DAG.getConstant(0x3, DL, MVT::i32));
// Convert byte offset to bit shift
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
// TODO: Contrary to the name of the functiom,
// it also handles sub i32 non-truncating stores (like i1)
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
Store->getValue());
// Mask the value to the right type
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
// Shift the value in place
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
MaskedValue, ShiftAmt);
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
DAG.getConstant(Mask, DL, MVT::i32),
ShiftAmt);
DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
DAG.getConstant(0xffffffff, DL, MVT::i32));
// Shift the mask in place
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
// Invert the mask. NOTE: if we had native ROL instructions we could
// use inverted mask
DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
// Cleanup the target bits
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
// Add the new bits
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Value, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32));
// Store dword
// TODO: Can we be smarter about MachinePointerInfo?
return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
unsigned AS = StoreNode->getAddressSpace();
SDValue Value = StoreNode->getValue();
EVT ValueVT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
unsigned Align = StoreNode->getAlignment();
SDValue Chain = StoreNode->getChain();
SDValue Ptr = StoreNode->getBasePtr();
SDValue Value = StoreNode->getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
EVT PtrVT = Ptr.getValueType();
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
ValueVT.isVector()) {
return SplitVectorStore(Op, DAG);
VT.isVector()) {
return scalarizeVectorStore(StoreNode, DAG);
}
// Private AS needs special fixes
if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) &&
unsigned Align = StoreNode->getAlignment();
if (Align < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
return expandUnalignedStore(StoreNode, DAG);
}
SDLoc DL(Op);
SDValue Chain = StoreNode->getChain();
SDValue Ptr = StoreNode->getBasePtr();
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
EVT VT = Value.getValueType();
assert(VT.bitsLE(MVT::i32));
SDValue MaskConstant;
if (MemVT == MVT::i8) {
@ -1169,15 +1216,19 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
assert(StoreNode->getAlignment() >= 2);
MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
}
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
DAG.getConstant(2, DL, MVT::i32));
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(0x00000003, DL, VT));
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
DAG.getConstant(0x00000003, DL, PtrVT));
SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
DAG.getConstant(3, DL, VT));
// Put the mask in correct place
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
// Put the mask in correct place
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
DAG.getConstant(3, DL, VT));
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
// XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
// vector instead.
SDValue Src[4] = {
@ -1191,12 +1242,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
Op->getVTList(), Args, MemVT,
StoreNode->getMemOperand());
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
ValueVT.bitsGE(MVT::i32)) {
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
Ptr, DAG.getConstant(2, DL, MVT::i32)));
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
llvm_unreachable("Truncated and indexed stores not supported yet");
@ -1207,49 +1255,22 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
return lowerPrivateTruncStore(StoreNode, DAG);
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
if (ValueVT.isVector()) {
unsigned NumElemVT = ValueVT.getVectorNumElements();
EVT ElemVT = ValueVT.getVectorElementType();
SmallVector<SDValue, 4> Stores(NumElemVT);
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
"vector width in load");
for (unsigned i = 0; i < NumElemVT; ++i) {
unsigned Channel, PtrIncr;
getStackAddress(StackWidth, i, Channel, PtrIncr);
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
DAG.getConstant(PtrIncr, DL, MVT::i32));
SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
Value, DAG.getConstant(i, DL, MVT::i32));
Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
Chain, Elem, Ptr,
DAG.getTargetConstant(Channel, DL, MVT::i32));
}
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
} else {
if (ValueVT == MVT::i8) {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
}
Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
// Standard i32+ store, tag it with DWORDADDR to note that the address
// has been shifted
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
}
return Chain;
// Tagged i32+ stores will be matched by patterns
return SDValue();
}
// return (512 + (kc_bank << 12)
@ -1299,51 +1320,50 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
LoadSDNode *Load = cast<LoadSDNode>(Op);
ISD::LoadExtType ExtType = Load->getExtensionType();
EVT MemVT = Load->getMemoryVT();
assert(Load->getAlignment() >= MemVT.getStoreSize());
// <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
// register (2-)byte extract.
SDValue BasePtr = Load->getBasePtr();
SDValue Chain = Load->getChain();
SDValue Offset = Load->getOffset();
// Get Register holding the target.
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
DAG.getConstant(2, DL, MVT::i32));
// Load the Register.
SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
Load->getChain(),
Ptr,
DAG.getTargetConstant(0, DL, MVT::i32),
Op.getOperand(2));
SDValue LoadPtr = BasePtr;
if (!Offset.isUndef()) {
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
}
// Get dword location
// NOTE: this should be eliminated by the future SHR ptr, 2
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
DAG.getConstant(0xfffffffc, DL, MVT::i32));
// Load dword
// TODO: can we be smarter about machine pointer info?
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
// Get offset within the register.
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
Load->getBasePtr(),
DAG.getConstant(0x3, DL, MVT::i32));
LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
// Bit offset of target byte (byteIdx * 8).
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
DAG.getConstant(3, DL, MVT::i32));
// Shift to the right.
Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
// Eliminate the upper bits by setting them to ...
EVT MemEltVT = MemVT.getScalarType();
// ... ones.
if (ExtType == ISD::SEXTLOAD) {
if (ExtType == ISD::SEXTLOAD) { // ... ones.
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
SDValue Ops[] = {
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
Load->getChain()
};
return DAG.getMergeValues(Ops, DL);
Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
} else { // ... or zeros.
Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
}
// ... or zeros.
SDValue Ops[] = {
DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
Load->getChain()
Ret,
Read.getValue(1) // This should be our output chain
};
return DAG.getMergeValues(Ops, DL);
@ -1365,12 +1385,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
scalarizeVectorLoad(LoadNode, DAG),
Chain
};
return DAG.getMergeValues(MergedValues, DL);
if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
@ -1421,8 +1439,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
SDValue LoweredLoad;
// For most operations returning SDValue() will result in the node being
// expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
// need to manually expand loads that may be legal in some address spaces and
@ -1447,47 +1463,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
unsigned StackWidth = TFL->getStackWidth(MF);
Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
if (VT.isVector()) {
unsigned NumElemVT = VT.getVectorNumElements();
EVT ElemVT = VT.getVectorElementType();
SDValue Loads[4];
assert(NumElemVT <= 4);
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
"vector width in load");
for (unsigned i = 0; i < NumElemVT; ++i) {
unsigned Channel, PtrIncr;
getStackAddress(StackWidth, i, Channel, PtrIncr);
Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
DAG.getConstant(PtrIncr, DL, MVT::i32));
Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
Chain, Ptr,
DAG.getTargetConstant(Channel, DL, MVT::i32),
Op.getOperand(2));
}
EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
Chain, Ptr,
DAG.getTargetConstant(0, DL, MVT::i32), // Channel
Op.getOperand(2));
// DWORDADDR ISD marks already shifted address
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
assert(VT == MVT::i32);
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
}
SDValue Ops[2] = {
LoweredLoad,
Chain
};
return DAG.getMergeValues(Ops, DL);
return SDValue();
}
SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {

View File

@ -1268,6 +1268,17 @@ let Predicates = [isR600] in {
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
// Hardcode channel to 0
// NOTE: LSHR is not available here. LSHR is per family instruction
def : Pat <
(i32 (load_private ADDRIndirect:$addr) ),
(R600_RegisterLoad FRAMEri:$addr, (i32 0))
>;
def : Pat <
(store_private i32:$val, ADDRIndirect:$addr),
(R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0))
>;
//===----------------------------------------------------------------------===//
// Pseudo instructions

View File

@ -99,6 +99,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
@ -699,7 +711,8 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
unsigned Offset, bool Signed) const {
unsigned Offset, bool Signed,
const ISD::InputArg *Arg) const {
const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
@ -713,20 +726,21 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
SDValue Val;
SDValue Val = Load;
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
}
if (MemVT.isFloatingPoint())
Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
else if (Signed)
Val = DAG.getSExtOrTrunc(Load, SL, VT);
Val = DAG.getSExtOrTrunc(Val, SL, VT);
else
Val = DAG.getZExtOrTrunc(Load, SL, VT);
Val = DAG.getZExtOrTrunc(Val, SL, VT);
SDValue Ops[] = {
Val,
Load.getValue(1)
};
return DAG.getMergeValues(Ops, SL);
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}
SDValue SITargetLowering::LowerFormalArguments(
@ -899,7 +913,8 @@ SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
Offset, Ins[i].Flags.isSExt());
Offset, Ins[i].Flags.isSExt(),
&Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =

View File

@ -24,7 +24,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
unsigned Offset) const;
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
SDValue Chain, unsigned Offset, bool Signed) const;
SDValue Chain, unsigned Offset, bool Signed,
const ISD::InputArg *Arg = nullptr) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,

View File

@ -203,8 +203,8 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintCode,
std::vector<SDValue> &OutOps) {
assert(ConstraintCode == InlineAsm::Constraint_m ||
ConstraintCode == InlineAsm::Constraint_Q &&
assert((ConstraintCode == InlineAsm::Constraint_m ||
ConstraintCode == InlineAsm::Constraint_Q) &&
"Unexpected asm memory constraint");
MachineRegisterInfo &RI = MF->getRegInfo();

View File

@ -14,6 +14,7 @@
#include "AVRISelLowering.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -1933,5 +1934,45 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
EVT VT,
SelectionDAG &DAG) const {
unsigned Reg;
if (VT == MVT::i8) {
Reg = StringSwitch<unsigned>(RegName)
.Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2)
.Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5)
.Case("r6", AVR::R6).Case("r7", AVR::R7).Case("r8", AVR::R8)
.Case("r9", AVR::R9).Case("r10", AVR::R10).Case("r11", AVR::R11)
.Case("r12", AVR::R12).Case("r13", AVR::R13).Case("r14", AVR::R14)
.Case("r15", AVR::R15).Case("r16", AVR::R16).Case("r17", AVR::R17)
.Case("r18", AVR::R18).Case("r19", AVR::R19).Case("r20", AVR::R20)
.Case("r21", AVR::R21).Case("r22", AVR::R22).Case("r23", AVR::R23)
.Case("r24", AVR::R24).Case("r25", AVR::R25).Case("r26", AVR::R26)
.Case("r27", AVR::R27).Case("r28", AVR::R28).Case("r29", AVR::R29)
.Case("r30", AVR::R30).Case("r31", AVR::R31)
.Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
.Default(0);
} else {
Reg = StringSwitch<unsigned>(RegName)
.Case("r0", AVR::R1R0).Case("r2", AVR::R3R2)
.Case("r4", AVR::R5R4).Case("r6", AVR::R7R6)
.Case("r8", AVR::R9R8).Case("r10", AVR::R11R10)
.Case("r12", AVR::R13R12).Case("r14", AVR::R15R14)
.Case("r16", AVR::R17R16).Case("r18", AVR::R19R18)
.Case("r20", AVR::R21R20).Case("r22", AVR::R23R22)
.Case("r24", AVR::R25R24).Case("r26", AVR::R27R26)
.Case("r28", AVR::R29R28).Case("r30", AVR::R31R30)
.Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
.Default(0);
}
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
}
} // end of namespace llvm

View File

@ -116,6 +116,9 @@ class AVRTargetLowering : public TargetLowering {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
private:
SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
SelectionDAG &DAG, SDLoc dl) const;

View File

@ -13,15 +13,13 @@
#include "BPF.h"
#include "BPFInstrInfo.h"
#include "BPFSubtarget.h"
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <iterator>
#define GET_INSTRINFO_CTOR_DTOR
#include "BPFGenInstrInfo.inc"
@ -109,11 +107,11 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
while (std::next(I) != MBB.end())
std::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
FBB = nullptr;
// Delete the J if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
TBB = 0;
TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
continue;

View File

@ -12,16 +12,15 @@
//===----------------------------------------------------------------------===//
#include "BPF.h"
#include "BPFRegisterInfo.h"
#include "BPFSubtarget.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include <cstdint>
using namespace llvm;
@ -36,14 +35,15 @@ class BPFDisassembler : public MCDisassembler {
public:
BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
virtual ~BPFDisassembler() {}
~BPFDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
}
} // end anonymous namespace
static MCDisassembler *createBPFDisassembler(const Target &T,
const MCSubtargetInfo &STI,

View File

@ -8,28 +8,24 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
using namespace llvm;
namespace {
class BPFAsmBackend : public MCAsmBackend {
public:
bool IsLittleEndian;
BPFAsmBackend(bool IsLittleEndian)
: MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
~BPFAsmBackend() override {}
~BPFAsmBackend() override = default;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
@ -53,6 +49,8 @@ class BPFAsmBackend : public MCAsmBackend {
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
};
} // end anonymous namespace
bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
if ((Count % 8) != 0)
return false;
@ -66,7 +64,6 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
@ -92,7 +89,6 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
}
}
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
const MCRegisterInfo &MRI,

View File

@ -10,29 +10,30 @@
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstdint>
using namespace llvm;
namespace {
class BPFELFObjectWriter : public MCELFObjectTargetWriter {
public:
BPFELFObjectWriter(uint8_t OSABI);
~BPFELFObjectWriter() override;
~BPFELFObjectWriter() override = default;
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
}
} // end anonymous namespace
BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_BPF,
/*HasRelocationAddend*/ false) {}
BPFELFObjectWriter::~BPFELFObjectWriter() {}
unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {

View File

@ -12,24 +12,25 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
class BPFMCCodeEmitter : public MCCodeEmitter {
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
void operator=(const BPFMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
bool IsLittleEndian;
@ -38,8 +39,9 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
bool IsLittleEndian)
: MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {}
~BPFMCCodeEmitter() {}
BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
void operator=(const BPFMCCodeEmitter &) = delete;
~BPFMCCodeEmitter() override = default;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@ -66,7 +68,8 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
}
} // end anonymous namespace
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,

View File

@ -12,14 +12,13 @@
//===----------------------------------------------------------------------===//
#include "BPF.h"
#include "BPFMCTargetDesc.h"
#include "BPFMCAsmInfo.h"
#include "InstPrinter/BPFInstPrinter.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@ -64,7 +63,7 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new BPFInstPrinter(MAI, MII, MRI);
return 0;
return nullptr;
}
extern "C" void LLVMInitializeBPFTargetMC() {

View File

@ -101,7 +101,7 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
}
LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
const char* Triple, const char* CPU, const char* Features,
const char *Triple, const char *CPU, const char *Features,
LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
LLVMCodeModel CodeModel) {
Optional<Reloc::Model> RM;
@ -139,7 +139,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
TargetOptions opt;
return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
CM, OL));
CM, OL));
}
void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }

View File

@ -28,6 +28,7 @@ class FunctionPass;
// LLVM IR passes.
ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
ModulePass *createWebAssemblyFixFunctionBitcasts();
FunctionPass *createWebAssemblyOptimizeReturned();
// ISel and immediate followup passes.

View File

@ -0,0 +1,159 @@
//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Fix bitcasted functions.
///
/// WebAssembly requires caller and callee signatures to match, however in LLVM,
/// some amount of slop is vaguely permitted. Detect mismatch by looking for
/// bitcasts of functions and rewrite them to use wrapper functions instead.
///
/// This doesn't catch all cases, such as when a function's address is taken in
/// one place and casted in another, but it works for many common cases.
///
/// Note that LLVM already optimizes away function bitcasts in common cases by
/// dropping arguments as needed, so this pass only ends up getting used in less
/// common cases.
///
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-fix-function-bitcasts"
namespace {
class FixFunctionBitcasts final : public ModulePass {
StringRef getPassName() const override {
return "WebAssembly Fix Function Bitcasts";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
ModulePass::getAnalysisUsage(AU);
}
bool runOnModule(Module &M) override;
public:
static char ID;
FixFunctionBitcasts() : ModulePass(ID) {}
};
} // End anonymous namespace
char FixFunctionBitcasts::ID = 0;
ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
return new FixFunctionBitcasts();
}
// Recursively descend the def-use lists from V to find non-bitcast users of
// bitcasts of V.
static void FindUses(Value *V, Function &F,
SmallVectorImpl<std::pair<Use *, Function *>> &Uses) {
for (Use &U : V->uses()) {
if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
FindUses(BC, F, Uses);
else if (U.get()->getType() != F.getType())
Uses.push_back(std::make_pair(&U, &F));
}
}
// Create a wrapper function with type Ty that calls F (which may have a
// different type). Attempt to support common bitcasted function idioms:
// - Call with more arguments than needed: arguments are dropped
// - Call with fewer arguments than needed: arguments are filled in with undef
// - Return value is not needed: drop it
// - Return value needed but not present: supply an undef
//
// For now, return nullptr without creating a wrapper if the wrapper cannot
// be generated due to incompatible types.
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
Module *M = F->getParent();
Function *Wrapper =
Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
// Determine what arguments to pass.
SmallVector<Value *, 4> Args;
Function::arg_iterator AI = Wrapper->arg_begin();
FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
FunctionType::param_iterator PE = F->getFunctionType()->param_end();
for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) {
if (AI->getType() != *PI) {
Wrapper->eraseFromParent();
return nullptr;
}
Args.push_back(&*AI);
}
for (; PI != PE; ++PI)
Args.push_back(UndefValue::get(*PI));
CallInst *Call = CallInst::Create(F, Args, "", BB);
// Determine what value to return.
if (Ty->getReturnType()->isVoidTy())
ReturnInst::Create(M->getContext(), BB);
else if (F->getFunctionType()->getReturnType()->isVoidTy())
ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
BB);
else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
ReturnInst::Create(M->getContext(), Call, BB);
else {
Wrapper->eraseFromParent();
return nullptr;
}
return Wrapper;
}
bool FixFunctionBitcasts::runOnModule(Module &M) {
SmallVector<std::pair<Use *, Function *>, 0> Uses;
// Collect all the places that need wrappers.
for (Function &F : M)
FindUses(&F, F, Uses);
DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
for (auto &UseFunc : Uses) {
Use *U = UseFunc.first;
Function *F = UseFunc.second;
PointerType *PTy = cast<PointerType>(U->get()->getType());
FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
// If the function is casted to something like i8* as a "generic pointer"
// to be later casted to something else, we can't generate a wrapper for it.
// Just ignore such casts for now.
if (!Ty)
continue;
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
Pair.first->second = CreateWrapper(F, Ty);
Function *Wrapper = Pair.first->second;
if (!Wrapper)
continue;
if (isa<Constant>(U->get()))
U->get()->replaceAllUsesWith(Wrapper);
else
U->set(Wrapper);
}
return true;
}

View File

@ -40,8 +40,8 @@ defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>;
defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>;
let isCommutable = 1 in {
defm EQ : ComparisonInt<SETEQ, "eq ", 0x46, 0x68>;
defm NE : ComparisonInt<SETNE, "ne ", 0x47, 0x69>;
defm EQ : ComparisonInt<SETEQ, "eq ", 0x46, 0x51>;
defm NE : ComparisonInt<SETNE, "ne ", 0x47, 0x52>;
} // isCommutable = 1
defm LT_S : ComparisonInt<SETLT, "lt_s", 0x48, 0x53>;
defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>;

View File

@ -163,6 +163,10 @@ void WebAssemblyPassConfig::addIRPasses() {
// control specifically what gets lowered.
addPass(createAtomicExpandPass(TM));
// Fix function bitcasts, as WebAssembly requires caller and callee signatures
// to match.
addPass(createWebAssemblyFixFunctionBitcasts());
// Optimize "returned" function attributes.
if (getOptLevel() != CodeGenOpt::None)
addPass(createWebAssemblyOptimizeReturned());

View File

@ -6962,23 +6962,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
}
/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
/// node.
static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
/// Returns true iff \p BV builds a vector with the result equivalent to
/// the result of ADDSUB operation.
/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
static bool isAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1) {
MVT VT = BV->getSimpleValueType(0);
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
return false;
SDLoc DL(BV);
unsigned NumElts = VT.getVectorNumElements();
SDValue InVec0 = DAG.getUNDEF(VT);
SDValue InVec1 = DAG.getUNDEF(VT);
assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
VT == MVT::v2f64) && "build_vector with an invalid type found!");
// Odd-numbered elements in the input build vector are obtained from
// adding two integer/float elements.
// Even-numbered elements in the input build vector are obtained from
@ -7000,7 +7001,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
// Early exit if we found an unexpected opcode.
if (Opcode != ExpectedOpcode)
return SDValue();
return false;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@ -7013,11 +7014,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
!isa<ConstantSDNode>(Op0.getOperand(1)) ||
!isa<ConstantSDNode>(Op1.getOperand(1)) ||
Op0.getOperand(1) != Op1.getOperand(1))
return SDValue();
return false;
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
if (I0 != i)
return SDValue();
return false;
// We found a valid add/sub node. Update the information accordingly.
if (i & 1)
@ -7029,39 +7030,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
if (InVec0.isUndef()) {
InVec0 = Op0.getOperand(0);
if (InVec0.getSimpleValueType() != VT)
return SDValue();
return false;
}
if (InVec1.isUndef()) {
InVec1 = Op1.getOperand(0);
if (InVec1.getSimpleValueType() != VT)
return SDValue();
return false;
}
// Make sure that operands in input to each add/sub node always
// come from a same pair of vectors.
if (InVec0 != Op0.getOperand(0)) {
if (ExpectedOpcode == ISD::FSUB)
return SDValue();
return false;
// FADD is commutable. Try to commute the operands
// and then test again.
std::swap(Op0, Op1);
if (InVec0 != Op0.getOperand(0))
return SDValue();
return false;
}
if (InVec1 != Op1.getOperand(0))
return SDValue();
return false;
// Update the pair of expected opcodes.
std::swap(ExpectedOpcode, NextExpectedOpcode);
}
// Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef())
return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
return false;
return SDValue();
Opnd0 = InVec0;
Opnd1 = InVec1;
return true;
}
/// Returns true if is possible to fold MUL and an idiom that has already been
/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
/// If (and only if) true is returned, the operands of FMADDSUB are written to
/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
///
/// Prior to calling this function it should be known that there is some
/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
/// before replacement of such SDNode with ADDSUB operation. Thus the number
/// of \p Opnd0 uses is expected to be equal to 2.
/// For example, this function may be called for the following IR:
/// %AB = fmul fast <2 x double> %A, %B
/// %Sub = fsub fast <2 x double> %AB, %C
/// %Add = fadd fast <2 x double> %AB, %C
/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
/// <2 x i32> <i32 0, i32 3>
/// There is a def for %Addsub here, which potentially can be replaced by
/// X86ISD::ADDSUB operation:
/// %Addsub = X86ISD::ADDSUB %AB, %C
/// and such ADDSUB can further be replaced with FMADDSUB:
/// %Addsub = FMADDSUB %A, %B, %C.
///
/// The main reason why this method is called before the replacement of the
/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
!Subtarget.hasAnyFMA())
return false;
// FIXME: These checks must match the similar ones in
// DAGCombiner::visitFADDForFMACombine. It would be good to have one
// function that would answer if it is Ok to fuse MUL + ADD to FMADD
// or MUL + ADDSUB to FMADDSUB.
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
if (!AllowFusion)
return false;
Opnd2 = Opnd1;
Opnd1 = Opnd0.getOperand(1);
Opnd0 = Opnd0.getOperand(0);
return true;
}
/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
SDLoc DL(BV);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
// X86 targets with 512-bit ADDSUB instructions!
// 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
// recognition.
if (VT.is512BitVector())
return SDValue();
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
@ -7290,7 +7370,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return VectorConstant;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG))
if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
return AddSub;
if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
return HorizontalOp;
@ -12965,6 +13045,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasVBMI())
return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
// the results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
@ -16985,9 +17071,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
}
if (Cond.getOpcode() == ISD::SETCC)
if (SDValue NewCond = LowerSETCC(Cond, DAG))
if (Cond.getOpcode() == ISD::SETCC) {
if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
Cond = NewCond;
// If the condition was updated, it's possible that the operands of the
// select were also updated (for example, EmitTest has a RAUW). Refresh
// the local references to the select operands in case they got stale.
Op1 = Op.getOperand(1);
Op2 = Op.getOperand(2);
}
}
// (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@ -17193,22 +17286,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
if (VT.is512BitVector() && InVTElt != MVT::i1) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
}
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
assert (InVTElt == MVT::i1 && "Unexpected vector type");
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
SDValue NegOne = DAG.getConstant(
APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
SDValue Zero = DAG.getConstant(
APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
SDValue V;
if (Subtarget.hasDQI()) {
V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
assert(!VT.is512BitVector() && "Unexpected vector type");
} else {
SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
if (VT.is512BitVector())
return V;
}
SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
if (VT.is512BitVector())
return V;
return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
}
@ -21528,6 +21625,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
}
// It's worth extending once and using the vXi16/vXi32 shifts for smaller
// types, but without AVX512 the extra overheads to get from vXi8 to vXi32
// make the existing SSE solution better.
if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||
(Subtarget.hasAVX512() && VT == MVT::v16i16) ||
(Subtarget.hasAVX512() && VT == MVT::v16i8) ||
(Subtarget.hasBWI() && VT == MVT::v32i8)) {
MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
if (VT == MVT::v16i8 ||
(VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
@ -21636,19 +21750,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
}
// It's worth extending once and using the v8i32 shifts for 16-bit types, but
// the extra overheads to get from v16i8 to v8i32 make the existing SSE
// solution better.
if (Subtarget.hasInt256() && VT == MVT::v8i16) {
MVT ExtVT = MVT::v8i32;
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, ExtVT, R);
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
@ -27763,29 +27864,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return SDValue();
}
/// \brief Try to combine a shuffle into a target-specific add-sub node.
/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
/// operation. If true is returned then the operands of ADDSUB operation
/// are written to the parameters \p Opnd0 and \p Opnd1.
///
/// We combine this directly on the abstract vector shuffle nodes so it is
/// easier to generically match. We also insert dummy vector shuffle nodes for
/// the operands which explicitly discard the lanes which are unused by this
/// operation to try to flow through the rest of the combiner the fact that
/// they're unused.
static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(N);
/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
/// so it is easier to generically match. We also insert dummy vector shuffle
/// nodes for the operands which explicitly discard the lanes which are unused
/// by this operation to try to flow through the rest of the combiner
/// the fact that they're unused.
static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
SDValue &Opnd0, SDValue &Opnd1) {
EVT VT = N->getValueType(0);
if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
(!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
(!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
return false;
// We only handle target-independent shuffles.
// FIXME: It would be easy and harmless to use the target shuffle mask
// extraction tool to support more.
if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
return false;
ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
SmallVector<int, 8> Mask(OrigMask.begin(), OrigMask.end());
SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
@ -27796,27 +27900,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
} else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
return SDValue();
return false;
// If there are other uses of these operations we can't fold them.
if (!V1->hasOneUse() || !V2->hasOneUse())
return SDValue();
return false;
// Ensure that both operations have the same operands. Note that we can
// commute the FADD operands.
SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return SDValue();
return false;
// We're looking for blends between FADD and FSUB nodes. We insist on these
// nodes being lined up in a specific expected pattern.
if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
8, 25, 10, 27, 12, 29, 14, 31})))
return false;
Opnd0 = LHS;
Opnd1 = RHS;
return true;
}
/// \brief Try to combine a shuffle into a target-specific add-sub or
/// mul-add-sub node.
static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue Opnd0, Opnd1;
if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
return SDValue();
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
EVT VT = N->getValueType(0);
SDLoc DL(N);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
// Do not generate X86ISD::ADDSUB node for 512-bit types even though
// the ADDSUB idiom has been successfully recognized. There are no known
// X86 targets with 512-bit ADDSUB instructions!
if (VT.is512BitVector())
return SDValue();
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
// We are looking for a shuffle where both sources are concatenated with undef
@ -27878,7 +28012,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))
if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
// During Type Legalization, when promoting illegal vector types,

View File

@ -443,6 +443,22 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
[(set VR512:$dst, (v16i32 immAllOnesV))]>;
}
// Alias instructions that allow VPTERNLOG to be used with a mask to create
// a mix of all ones and all zeros elements. This is done this way to force
// the same register to be used as input for all three sources.
let isPseudo = 1, Predicates = [HasAVX512] in {
def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
(ins VK16WM:$mask), "",
[(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
(v16i32 immAllOnesV),
(v16i32 immAllZerosV)))]>;
def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
(ins VK8WM:$mask), "",
[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
(bc_v8i64 (v16i32 immAllOnesV)),
(bc_v8i64 (v16i32 immAllZerosV))))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
@ -1064,10 +1080,10 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4f64 VR256X:$src), 1)>;
(v4f64 VR256X:$src), 1)>;
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4i64 VR256X:$src), 1)>;
(v4i64 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
@ -1485,8 +1501,7 @@ defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
// AVX-512 - BLEND using mask
//
multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
let hasSideEffects = 0 in
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
@ -1496,16 +1511,13 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[(set _.RC:$dst, (vselect _.KRCWM:$mask,
(_.VT _.RC:$src2),
(_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
let hasSideEffects = 0 in
[]>, EVEX_4V, EVEX_K;
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ;
let mayLoad = 1, hasSideEffects = 0 in
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
@ -1515,38 +1527,32 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
[(set _.RC:$dst, (vselect _.KRCWM:$mask,
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(_.VT _.RC:$src1)))]>,
EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
let mayLoad = 1, hasSideEffects = 0 in
[]>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
}
}
}
multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let mayLoad = 1, hasSideEffects = 0 in {
def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.RC:$dst,(vselect _.KRCWM:$mask,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1)))]>,
EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
[]>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
let mayLoad = 1, hasSideEffects = 0 in
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, "}"),
[]>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
}
}
multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
@ -1582,21 +1588,6 @@ defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
(v8f32 VR256X:$src2))),
(EXTRACT_SUBREG
(v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v8i32 VR256X:$src2))),
(EXTRACT_SUBREG
(v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
//===----------------------------------------------------------------------===//
// Compare Instructions
//===----------------------------------------------------------------------===//
@ -2735,7 +2726,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
"${dst} {${mask}} {z}, $src}"),
[(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
[(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
(_.VT _.RC:$src),
_.ImmAllZerosV)))], _.ExeDomain>,
EVEX, EVEX_KZ;
@ -2972,6 +2963,30 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
// available. Use a 512-bit operation and extract.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
(v8f32 VR256X:$src0))),
(EXTRACT_SUBREG
(v16f32
(VMOVAPSZrrk
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
sub_ymm)>;
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v8i32 VR256X:$src0))),
(EXTRACT_SUBREG
(v16i32
(VMOVDQA32Zrrk
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
sub_ymm)>;
}
let Predicates = [HasVLX, NoBWI] in {
// 128-bit load/store without BWI.
def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
@ -3116,13 +3131,13 @@ let Predicates = [HasVLX] in {
(VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
}
// Move Int Doubleword to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
// Move Int Doubleword to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
EVEX;
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
@ -3152,47 +3167,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src
def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
EVEX_CD8<64, CD8VT1>;
}
} // ExeDomain = SSEPackedInt
// Move Int Doubleword to Single Scalar
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
EVEX_CD8<64, CD8VT1>;
}
} // ExeDomain = SSEPackedInt
// Move Int Doubleword to Single Scalar
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))],
IIC_SSE_MOVDQ>, EVEX;
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move doubleword from xmm register to r/m32
//
let ExeDomain = SSEPackedInt in {
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move doubleword from xmm register to r/m32
//
let ExeDomain = SSEPackedInt in {
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
EVEX;
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (extractelt (v4i32 VR128X:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt
// Move quadword from xmm1 register to r/m64
//
let ExeDomain = SSEPackedInt in {
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
[(store (i32 (extractelt (v4i32 VR128X:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt
// Move quadword from xmm1 register to r/m64
//
let ExeDomain = SSEPackedInt in {
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
Requires<[HasAVX512, In64BitMode]>;
@ -3213,39 +3228,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
let hasSideEffects = 0 in
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
EVEX, VEX_W;
} // ExeDomain = SSEPackedInt
// Move Scalar Single to Double Int
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
(ins FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
(ins VR128X:$src),
"vmovq.s\t{$src, $dst|$dst, $src}",[]>,
EVEX, VEX_W;
} // ExeDomain = SSEPackedInt
// Move Scalar Single to Double Int
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
(ins FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32X:$src))],
IIC_SSE_MOVD_ToGP>, EVEX;
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move Quadword Int to Packed Quadword Int
//
let ExeDomain = SSEPackedInt in {
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
} // ExeDomain = SSEPackedInt
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move Quadword Int to Packed Quadword Int
//
let ExeDomain = SSEPackedInt in {
def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
} // ExeDomain = SSEPackedInt
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar<string asm, SDNode OpNode,
@ -8646,6 +8661,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(bitconvert (v4i32 immAllZerosV))),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
//===----------------------------------------------------------------------===//

View File

@ -543,7 +543,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MOV8rr, X86::MOV8rm, 0 },
{ X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
{ X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
{ X86::MOVDDUPrr, X86::MOVDDUPrm, 0 },
{ X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
@ -661,7 +661,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, 0 },
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
{ X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
{ X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
@ -6864,6 +6864,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(Reg, RegState::Undef).addImm(0xff);
return true;
}
case X86::AVX512_512_SEXT_MASK_32:
case X86::AVX512_512_SEXT_MASK_64: {
unsigned Reg = MIB->getOperand(0).getReg();
unsigned MaskReg = MIB->getOperand(1).getReg();
unsigned MaskState = getRegState(MIB->getOperand(1));
unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
MI.RemoveOperand(1);
MIB->setDesc(get(Opc));
// VPTERNLOG needs 3 register inputs and an immediate.
// 0xff will return 1s for any input.
MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
return true;
}
case X86::VMOVAPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
get(X86::VBROADCASTF32X4rm), X86::sub_xmm);

View File

@ -6397,7 +6397,7 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
let Predicates = [UseAVX] in {

View File

@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
{ ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
};
@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry AVX2UniformConstCostTable[] = {
{ ISD::SHL, MVT::v32i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry SSE2UniformConstCostTable[] = {
{ ISD::SHL, MVT::v16i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand).
{ ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand).
{ ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb).
{ ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
@ -207,6 +223,43 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i16, 1 }, // psllw.
{ ISD::SRL, MVT::v16i16, 1 }, // psrlw.
{ ISD::SRA, MVT::v16i16, 1 }, // psraw.
};
if (ST->hasAVX2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry SSE2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
};
if (ST->hasSSE2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry AVX512DQCostTable[] = {
{ ISD::MUL, MVT::v2i64, 1 },
{ ISD::MUL, MVT::v4i64, 1 },
@ -219,6 +272,10 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWCostTable[] = {
{ ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
{ ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
{ ISD::SRA, MVT::v32i16, 1 }, // vpsravw
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence.
@ -259,7 +316,7 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2CostTable[] = {
static const CostTblEntry AVX2ShiftCostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::SHL, MVT::v4i32, 1 },
@ -283,11 +340,11 @@ int X86TTIImpl::getArithmeticInstrCost(
// is lowered into a vector multiply (vpmullw).
return LT.first;
if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
static const CostTblEntry XOPCostTable[] = {
static const CostTblEntry XOPShiftCostTable[] = {
// 128bit shifts take 1cy, but right shifts require negation beforehand.
{ ISD::SHL, MVT::v16i8, 1 },
{ ISD::SRL, MVT::v16i8, 2 },
@ -318,93 +375,20 @@ int X86TTIImpl::getArithmeticInstrCost(
// Look for XOP lowering tricks.
if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
{ ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
{ ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v8i32, 1 }, // pmulld
{ ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
};
// Look for AVX2 lowering tricks for custom cases.
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVXCustomCostTable[] = {
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
{ ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v32i8, 32*20 },
{ ISD::SDIV, MVT::v16i16, 16*20 },
{ ISD::SDIV, MVT::v8i32, 8*20 },
{ ISD::SDIV, MVT::v4i64, 4*20 },
{ ISD::UDIV, MVT::v32i8, 32*20 },
{ ISD::UDIV, MVT::v16i16, 16*20 },
{ ISD::UDIV, MVT::v8i32, 8*20 },
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
// Look for AVX2 lowering tricks for custom cases.
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry
SSE2UniformCostTable[] = {
static const CostTblEntry SSE2UniformShiftCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
{ ISD::SHL, MVT::v16i16, 2 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
{ ISD::SHL, MVT::v8i32, 2 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
{ ISD::SHL, MVT::v4i64, 2 }, // psllq.
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
{ ISD::SRL, MVT::v32i8, 2 }, // psrlw.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
{ ISD::SRL, MVT::v16i16, 2 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
{ ISD::SRL, MVT::v8i32, 2 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
{ ISD::SRL, MVT::v4i64, 2 }, // psrlq.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v16i16, 2 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
{ ISD::SRA, MVT::v8i32, 2 }, // psrad.
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
@ -414,7 +398,7 @@ int X86TTIImpl::getArithmeticInstrCost(
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
if (const auto *Entry =
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
}
@ -422,24 +406,98 @@ int X86TTIImpl::getArithmeticInstrCost(
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
MVT VT = LT.second;
// Vector shift left by non uniform constant can be lowered
// into vector multiply (pmullw/pmulld).
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
(VT == MVT::v4i32 && ST->hasSSE41()))
return LT.first;
// v16i16 and v8i32 shifts by non-uniform constants are lowered into a
// sequence of extract + two vector multiply + insert.
if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
(ST->hasAVX() && !ST->hasAVX2()))
ISD = ISD::MUL;
// A vector shift left by non uniform constant is converted
// into a vector multiply; the new multiply is eventually
// lowered into a sequence of shuffles and 2 x pmuludq.
if (VT == MVT::v4i32 && ST->hasSSE2())
// into vector multiply.
if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
ISD = ISD::MUL;
}
static const CostTblEntry AVX2CostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
{ ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
{ ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
{ ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
{ ISD::SUB, MVT::v32i8, 1 }, // psubb
{ ISD::ADD, MVT::v32i8, 1 }, // paddb
{ ISD::SUB, MVT::v16i16, 1 }, // psubw
{ ISD::ADD, MVT::v16i16, 1 }, // paddw
{ ISD::SUB, MVT::v8i32, 1 }, // psubd
{ ISD::ADD, MVT::v8i32, 1 }, // paddd
{ ISD::SUB, MVT::v4i64, 1 }, // psubq
{ ISD::ADD, MVT::v4i64, 1 }, // paddq
{ ISD::MUL, MVT::v32i8, 17 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 7 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i16, 1 }, // pmullw
{ ISD::MUL, MVT::v8i32, 1 }, // pmulld
{ ISD::MUL, MVT::v4i64, 8 }, // 3*pmuludq/3*shift/2*add
{ ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
{ ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
};
// Look for AVX2 lowering tricks for custom cases.
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
{ ISD::MUL, MVT::v16i16, 4 },
{ ISD::MUL, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v32i8, 4 },
{ ISD::ADD, MVT::v32i8, 4 },
{ ISD::SUB, MVT::v16i16, 4 },
{ ISD::ADD, MVT::v16i16, 4 },
{ ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v4i64, 4 },
{ ISD::ADD, MVT::v4i64, 4 },
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
// Because we believe v4i64 to be a legal type, we must also include the
// extract+insert in the cost table. Therefore, the cost here is 18
// instead of 8.
{ ISD::MUL, MVT::v4i64, 18 },
{ ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence.
{ ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/
{ ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v32i8, 32*20 },
{ ISD::SDIV, MVT::v16i16, 16*20 },
{ ISD::SDIV, MVT::v8i32, 8*20 },
{ ISD::SDIV, MVT::v4i64, 4*20 },
{ ISD::UDIV, MVT::v32i8, 32*20 },
{ ISD::UDIV, MVT::v16i16, 16*20 },
{ ISD::UDIV, MVT::v8i32, 8*20 },
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE42CostTable[] = {
{ ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
@ -456,6 +514,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
{ ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
{ ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
{ ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld
{ ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld
{ ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
{ ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
@ -501,6 +561,7 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
{ ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v8i16, 1 }, // pmullw
{ ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle
{ ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add
@ -516,46 +577,19 @@ int X86TTIImpl::getArithmeticInstrCost(
// generally a bad idea. Assume somewhat arbitrarily that we have to be able
// to hide "20 cycles" for each lane.
{ ISD::SDIV, MVT::v16i8, 16*20 },
{ ISD::SDIV, MVT::v8i16, 8*20 },
{ ISD::SDIV, MVT::v4i32, 4*20 },
{ ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::SDIV, MVT::v8i16, 8*20 },
{ ISD::SDIV, MVT::v4i32, 4*20 },
{ ISD::SDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v16i8, 16*20 },
{ ISD::UDIV, MVT::v8i16, 8*20 },
{ ISD::UDIV, MVT::v4i32, 4*20 },
{ ISD::UDIV, MVT::v2i64, 2*20 },
{ ISD::UDIV, MVT::v8i16, 8*20 },
{ ISD::UDIV, MVT::v4i32, 4*20 },
{ ISD::UDIV, MVT::v2i64, 2*20 },
};
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
{ ISD::MUL, MVT::v16i16, 4 },
{ ISD::MUL, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v32i8, 4 },
{ ISD::ADD, MVT::v32i8, 4 },
{ ISD::SUB, MVT::v16i16, 4 },
{ ISD::ADD, MVT::v16i16, 4 },
{ ISD::SUB, MVT::v8i32, 4 },
{ ISD::ADD, MVT::v8i32, 4 },
{ ISD::SUB, MVT::v4i64, 4 },
{ ISD::ADD, MVT::v4i64, 4 },
// A v4i64 multiply is custom lowered as two split v2i64 vectors that then
// are lowered as a series of long multiplies(3), shifts(3) and adds(2)
// Because we believe v4i64 to be a legal type, we must also include the
// extract+insert in the cost table. Therefore, the cost here is 18
// instead of 8.
{ ISD::MUL, MVT::v4i64, 18 },
};
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE1CostTable[] = {
{ ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
@ -639,8 +673,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v64i8, 6 }, // vextracti64x4 + 2*vperm2i128
// + 2*pshufb + vinserti64x4
{ TTI::SK_Reverse, MVT::v64i8, 2 }, // pshufb + vshufi64x2
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw

View File

@ -42,6 +42,8 @@
using namespace llvm;
using namespace lowertypetests;
using SummaryAction = LowerTypeTestsSummaryAction;
#define DEBUG_TYPE "lowertypetests"
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
@ -55,9 +57,15 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
static cl::opt<std::string> ClSummaryAction(
static cl::opt<SummaryAction> ClSummaryAction(
"lowertypetests-summary-action",
cl::desc("What to do with the summary when running this pass"), cl::Hidden);
cl::desc("What to do with the summary when running this pass"),
cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
clEnumValN(SummaryAction::Import, "import",
"Import typeid resolutions from summary and globals"),
clEnumValN(SummaryAction::Export, "export",
"Export typeid resolutions to summary and globals")),
cl::Hidden);
static cl::opt<std::string> ClReadSummary(
"lowertypetests-read-summary",
@ -226,8 +234,8 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
class LowerTypeTestsModule {
Module &M;
// This is for testing purposes only.
std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
SummaryAction Action;
ModuleSummaryIndex *Summary;
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
@ -319,21 +327,38 @@ class LowerTypeTestsModule {
void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
public:
LowerTypeTestsModule(Module &M);
~LowerTypeTestsModule();
LowerTypeTestsModule(Module &M, SummaryAction Action,
ModuleSummaryIndex *Summary);
bool lower();
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
static bool runForTesting(Module &M);
};
struct LowerTypeTests : public ModulePass {
static char ID;
LowerTypeTests() : ModulePass(ID) {
bool UseCommandLine = false;
SummaryAction Action;
ModuleSummaryIndex *Summary;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
: ModulePass(ID), Action(Action), Summary(Summary) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
return LowerTypeTestsModule(M).lower();
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
return LowerTypeTestsModule(M, Action, Summary).lower();
}
};
@ -343,7 +368,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
false)
char LowerTypeTests::ID = 0;
ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; }
ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
ModuleSummaryIndex *Summary) {
return new LowerTypeTests(Action, Summary);
}
/// Build a bit set for TypeId using the object layouts in
/// GlobalLayout.
@ -1145,22 +1173,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
}
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
// Handle the command-line summary arguments. This code is for testing
// purposes only, so we handle errors directly.
if (!ClSummaryAction.empty()) {
OwnedSummary = make_unique<ModuleSummaryIndex>();
if (!ClReadSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
": ");
auto ReadSummaryFile =
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
yaml::Input In(ReadSummaryFile->getBuffer());
In >> *OwnedSummary;
ExitOnErr(errorCodeToError(In.error()));
}
}
LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
ModuleSummaryIndex *Summary)
: M(M), Action(Action), Summary(Summary) {
// FIXME: Use these fields.
(void)this->Action;
(void)this->Summary;
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
@ -1169,18 +1187,36 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
ObjectFormat = TargetTriple.getObjectFormat();
}
LowerTypeTestsModule::~LowerTypeTestsModule() {
if (ClSummaryAction.empty() || ClWriteSummary.empty())
return;
bool LowerTypeTestsModule::runForTesting(Module &M) {
ModuleSummaryIndex Summary;
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
": ");
std::error_code EC;
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
ExitOnErr(errorCodeToError(EC));
// Handle the command-line summary arguments. This code is for testing
// purposes only, so we handle errors directly.
if (!ClReadSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
": ");
auto ReadSummaryFile =
ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
yaml::Output Out(OS);
Out << *OwnedSummary;
yaml::Input In(ReadSummaryFile->getBuffer());
In >> Summary;
ExitOnErr(errorCodeToError(In.error()));
}
bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
": ");
std::error_code EC;
raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
ExitOnErr(errorCodeToError(EC));
yaml::Output Out(OS);
Out << Summary;
}
return Changed;
}
bool LowerTypeTestsModule::lower() {
@ -1313,7 +1349,8 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
bool Changed = LowerTypeTestsModule(M).lower();
bool Changed =
LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();

View File

@ -857,7 +857,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
// Lower type metadata and the type.test intrinsic. This pass supports Clang's
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
PM.add(createLowerTypeTestsPass());
PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
/*Summary=*/nullptr));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);

View File

@ -1903,7 +1903,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
return foldICmpShlOne(Cmp, Shl, C);
// Check that the shift amount is in range. If not, don't perform undefined
// shifts. When the shift is visited it will be simplified.
// shifts. When the shift is visited, it will be simplified.
unsigned TypeBits = C->getBitWidth();
if (ShiftAmt->uge(TypeBits))
return nullptr;
@ -1923,7 +1923,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
return new ICmpInst(Pred, X, LShrC);
if (Shl->hasOneUse()) {
// Otherwise strength reduce the shift into an and.
// Otherwise, strength reduce the shift into an and.
Constant *Mask = ConstantInt::get(Shl->getType(),
APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
@ -1951,7 +1951,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
}
// When the shift is nuw and pred is >u or <=u, comparison only really happens
// in the pre-shifted bits. Since InstSimplify canoncalizes <=u into <u, the
// in the pre-shifted bits. Since InstSimplify canonicalizes <=u into <u, the
// <=u case can be further converted to match <u (see below).
if (Shl->hasNoUnsignedWrap() &&
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) {
@ -1970,9 +1970,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
// Transform (icmp pred iM (shl iM %v, N), C)
// -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
// Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
// This enables us to get rid of the shift in favor of a trunc which can be
// This enables us to get rid of the shift in favor of a trunc that may be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
// smaller constant that may be more target-friendly.
unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt &&
DL.isLegalInteger(TypeBits - Amt)) {

View File

@ -1818,6 +1818,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
RegisteredFlag = new GlobalVariable(
M, IntptrTy, false, GlobalVariable::CommonLinkage,
ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
// Update llvm.compiler.used, adding the new liveness globals. This is
// needed so that during LTO these variables stay alive. The alternative

View File

@ -1423,7 +1423,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
if (widenLoopCompare(DU))
return nullptr;
// This user does not evaluate to a recurence after widening, so don't
// This user does not evaluate to a recurrence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
truncateIVUse(DU, DT, LI);

View File

@ -415,7 +415,9 @@ class LoadEliminationForLoop {
Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
PH->getTerminator());
Value *Initial =
new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
Cand.Load->getAlignment(), PH->getTerminator());
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
&L->getHeader()->front());
PHI->addIncoming(Initial, PH);

View File

@ -1382,8 +1382,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
Succ->begin(), Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);
BI->eraseFromParent();
// Remove Succ from the loop tree.
LI->removeBlock(Succ);

View File

@ -79,7 +79,8 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
STATISTIC(NumGVNMaxIterations,
"Maximum Number of iterations it took to converge GVN");
//===----------------------------------------------------------------------===//
// GVN Pass
@ -327,7 +328,7 @@ class NewGVN : public FunctionPass {
// Elimination.
struct ValueDFS;
void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
std::vector<ValueDFS> &);
SmallVectorImpl<ValueDFS> &);
bool eliminateInstructions(Function &);
void replaceInstruction(Instruction *, Value *);
@ -336,8 +337,11 @@ class NewGVN : public FunctionPass {
// New instruction creation.
void handleNewInstruction(Instruction *){};
// Various instruction touch utilities
void markUsersTouched(Value *);
void markMemoryUsersTouched(MemoryAccess *);
void markLeaderChangeTouched(CongruenceClass *CC);
// Utilities.
void cleanupTables();
@ -390,10 +394,10 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
BasicBlock *PhiBlock = I->getParent();
BasicBlock *PHIBlock = I->getParent();
auto *PN = cast<PHINode>(I);
auto *E = new (ExpressionAllocator)
PHIExpression(PN->getNumOperands(), I->getParent());
auto *E =
new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
E->allocateOperands(ArgRecycler, ExpressionAllocator);
E->setType(I->getType());
@ -408,10 +412,10 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use &U) -> Value * {
// Don't try to transform self-defined phis
// Don't try to transform self-defined phis.
if (U == PN)
return PN;
const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock);
const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
return lookupOperandLeader(U, I, BBE);
});
return E;
@ -710,6 +714,15 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
return E;
}
// Utility function to check whether the congruence class has a member other
// than the given instruction.
bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
// Either it has more than one member, in which case it must contain something
// other than us (because it's indexed by value), or if it only has one member
// right now, that member should not be us.
return CC->Members.size() > 1 || CC->Members.count(I) == 0;
}
const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
const BasicBlock *B) {
// Unlike loads, we never try to eliminate stores, so we do not check if they
@ -725,8 +738,12 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
// Basically, check if the congruence class the store is in is defined by a
// store that isn't us, and has the same value. MemorySSA takes care of
// ensuring the store has the same memory state as us already.
if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
hasMemberOtherThanUs(CC, I))
return createStoreExpression(SI, StoreRHS, B);
}
@ -810,36 +827,50 @@ bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
const BasicBlock *B) {
auto *E = cast<PHIExpression>(createPHIExpression(I));
if (E->op_empty()) {
// We match the semantics of SimplifyPhiNode from InstructionSimplify here.
// See if all arguaments are the same.
// We track if any were undef because they need special handling.
bool HasUndef = false;
auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
if (Arg == I)
return false;
if (isa<UndefValue>(Arg)) {
HasUndef = true;
return false;
}
return true;
});
// If we are left with no operands, it's undef
if (Filtered.begin() == Filtered.end()) {
DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
<< "\n");
E->deallocateOperands(ArgRecycler);
ExpressionAllocator.Deallocate(E);
return createConstantExpression(UndefValue::get(I->getType()));
}
Value *AllSameValue = E->getOperand(0);
// See if all arguments are the same, ignoring undef arguments, because we can
// choose a value that is the same for them.
for (const Value *Arg : E->operands())
if (Arg != AllSameValue && !isa<UndefValue>(Arg)) {
AllSameValue = nullptr;
break;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
return V == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
// phi nodes with cycles (IE dependent on other phis that are .... dependent
// on the original phi node), especially in weird CFG's where some arguments
// are unreachable, or uninitialized along certain paths. This can cause
// infinite loops during evaluation. We work around this by not trying to
// really evaluate them independently, but instead using a variable
// expression to say if one is equivalent to the other.
// We also special case undef, so that if we have an undef, we can't use the
// common value unless it dominates the phi block.
if (HasUndef) {
// Only have to check for instructions
if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
if (!DT->dominates(AllSameInst, I))
return E;
}
if (AllSameValue) {
// It's possible to have phi nodes with cycles (IE dependent on
// other phis that are .... dependent on the original phi node),
// especially in weird CFG's where some arguments are unreachable, or
// uninitialized along certain paths.
// This can cause infinite loops during evaluation (even if you disable
// the recursion below, you will simply ping-pong between congruence
// classes). If a phi node symbolically evaluates to another phi node,
// just leave it alone. If they are really the same, we will still
// eliminate them in favor of each other.
if (isa<PHINode>(AllSameValue))
return E;
NumGVNPhisAllSame++;
DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
<< "\n");
@ -1007,12 +1038,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
}
}
// Touch the instructions that need to be updated after a congruence class has a
// leader change, and mark changed values.
void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
for (auto M : CC->Members) {
if (auto *I = dyn_cast<Instruction>(M))
TouchedInstructions.set(InstrDFS[I]);
ChangedValues.insert(M);
}
}
// Perform congruence finding on a given value numbering expression.
void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
ValueToExpression[V] = E;
// This is guaranteed to return something, since it will at least find
// INITIAL.
CongruenceClass *VClass = ValueToClass[V];
assert(VClass && "Should have found a vclass");
// Dead classes should have been eliminated from the mapping.
@ -1031,14 +1072,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
place->second = NewClass;
// Constants and variables should always be made the leader.
if (const auto *CE = dyn_cast<ConstantExpression>(E))
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
NewClass->RepLeader = CE->getConstantValue();
else if (const auto *VE = dyn_cast<VariableExpression>(E))
NewClass->RepLeader = VE->getVariableValue();
else if (const auto *SE = dyn_cast<StoreExpression>(E))
NewClass->RepLeader = SE->getStoreInst()->getValueOperand();
else
} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
StoreInst *SI = SE->getStoreInst();
NewClass->RepLeader =
lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
} else {
NewClass->RepLeader = V;
}
assert(!isa<VariableExpression>(E) &&
"VariableExpression should have been handled already");
EClass = NewClass;
DEBUG(dbgs() << "Created new congruence class for " << *V
@ -1077,14 +1121,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
ExpressionToClass.erase(VClass->DefiningExpr);
}
} else if (VClass->RepLeader == V) {
// FIXME: When the leader changes, the value numbering of
// everything may change, so we need to reprocess.
// When the leader changes, the value numbering of
// everything may change due to symbolization changes, so we need to
// reprocess.
VClass->RepLeader = *(VClass->Members.begin());
for (auto M : VClass->Members) {
if (auto *I = dyn_cast<Instruction>(M))
TouchedInstructions.set(InstrDFS[I]);
ChangedValues.insert(M);
}
markLeaderChangeTouched(VClass);
}
}
@ -1106,6 +1147,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
markMemoryUsersTouched(MA);
}
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
// There is, sadly, one complicating thing for stores. Stores do not
// produce values, only consume them. However, in order to make loads and
// stores value number the same, we ignore the value operand of the store.
// But the value operand will still be the leader of our class, and thus, it
// may change. Because the store is a use, the store will get reprocessed,
// but nothing will change about it, and so nothing above will catch it
// (since the class will not change). In order to make sure everything ends
// up okay, we need to recheck the leader of the class. Since stores of
// different values value number differently due to different memorydefs, we
// are guaranteed the leader is always the same between stores in the same
// class.
DEBUG(dbgs() << "Checking store leader\n");
auto ProperLeader =
lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
if (EClass->RepLeader != ProperLeader) {
DEBUG(dbgs() << "Store leader changed, fixing\n");
EClass->RepLeader = ProperLeader;
markLeaderChangeTouched(EClass);
markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
}
}
}
@ -1708,8 +1770,9 @@ struct NewGVN::ValueDFS {
}
};
void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
std::vector<ValueDFS> &DFSOrderedSet) {
void NewGVN::convertDenseToDFSOrdered(
CongruenceClass::MemberSet &Dense,
SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
for (auto D : Dense) {
// First add the value.
BasicBlock *BB = getBlockForValue(D);
@ -1972,21 +2035,25 @@ bool NewGVN::eliminateInstructions(Function &F) {
ValueDFSStack EliminationStack;
// Convert the members to DFS ordered sets and then merge them.
std::vector<ValueDFS> DFSOrderedSet;
SmallVector<ValueDFS, 8> DFSOrderedSet;
convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
// Sort the whole thing.
sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
for (auto &C : DFSOrderedSet) {
int MemberDFSIn = C.DFSIn;
int MemberDFSOut = C.DFSOut;
Value *Member = C.Val;
Use *MemberUse = C.U;
for (auto &VD : DFSOrderedSet) {
int MemberDFSIn = VD.DFSIn;
int MemberDFSOut = VD.DFSOut;
Value *Member = VD.Val;
Use *MemberUse = VD.U;
// We ignore void things because we can't get a value from them.
if (Member && Member->getType()->isVoidTy())
continue;
if (Member) {
// We ignore void things because we can't get a value from them.
// FIXME: We could actually use this to kill dead stores that are
// dominated by equivalent earlier stores.
if (Member->getType()->isVoidTy())
continue;
}
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
@ -1995,8 +2062,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
<< EliminationStack.dfs_back().first << ","
<< EliminationStack.dfs_back().second << ")\n");
}
if (Member && isa<Constant>(Member))
assert(isa<Constant>(CC->RepLeader));
DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << ","
<< MemberDFSOut << ")\n");
@ -2037,11 +2102,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
continue;
Value *Result = EliminationStack.back();
// Don't replace our existing users with ourselves, and don't replace
// phi node arguments with the result of the same phi node.
// IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef)
if (MemberUse->get() == Result ||
(isa<PHINode>(Result) && MemberUse->getUser() == Result))
// Don't replace our existing users with ourselves.
if (MemberUse->get() == Result)
continue;
DEBUG(dbgs() << "Found replacement " << *Result << " for "

View File

@ -511,9 +511,6 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
void visitSelectInst(SelectInst &I);
void visitBinaryOperator(Instruction &I);
void visitCmpInst(CmpInst &I);
void visitExtractElementInst(ExtractElementInst &I);
void visitInsertElementInst(InsertElementInst &I);
void visitShuffleVectorInst(ShuffleVectorInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
@ -970,21 +967,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
markOverdefined(&I);
}
void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
}
void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
}
void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
// TODO : SCCP does not handle vectors properly.
return markOverdefined(&I);
}
// Handle getelementptr instructions. If all operands are constants then we
// can turn this into a getelementptr ConstantExpr.
//

View File

@ -67,12 +67,15 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
return true;
}
// When exporting, consult the index.
auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID());
assert(Summaries != ImportIndex.end() &&
"Missing summary for global value when exporting");
assert(Summaries->second.size() == 1 && "Local has more than one summary");
auto Linkage = Summaries->second.front()->linkage();
// When exporting, consult the index. We can have more than one local
// with the same GUID, in the case of same-named locals in different but
// same-named source files that were compiled in their respective directories
// (so the source file name and resulting GUID is the same). Find the one
// in this module.
auto Summary = ImportIndex.findSummaryInModule(
SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
assert(Summary && "Missing summary for global value when exporting");
auto Linkage = Summary->linkage();
if (!GlobalValue::isLocalLinkage(Linkage)) {
assert(!isNonRenamableLocal(*SGV) &&
"Attempting to promote non-renamable local");

View File

@ -1189,19 +1189,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
StringRef Name = Callee->getName();
if (Name == "fabs" && hasFloatVersion(Name))
Ret = optimizeUnaryDoubleFP(CI, B, false);
return optimizeUnaryDoubleFP(CI, B, false);
Value *Op = CI->getArgOperand(0);
if (Instruction *I = dyn_cast<Instruction>(Op)) {
// Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
if (I->getOpcode() == Instruction::FMul)
if (I->getOperand(0) == I->getOperand(1))
return Op;
}
return Ret;
return nullptr;
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {

View File

@ -783,6 +783,10 @@ class InnerLoopVectorizer {
// Similarly, we create a new latch condition when setting up the structure
// of the new loop, so the old one can become dead.
SmallPtrSet<Instruction *, 4> DeadInstructions;
// Holds the end values for each induction variable. We save the end values
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
@ -1879,13 +1883,6 @@ class LoopVectorizationCostModel {
unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
unsigned LoopCost);
/// \return The most profitable unroll factor.
/// This method finds the best unroll-factor based on register pressure and
/// other parameters. VF and LoopCost are the selected vectorization factor
/// and the cost of the selected VF.
unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage
/// of a loop.
struct RegisterUsage {
@ -3424,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Create phi nodes to merge from the backedge-taken check block.
PHINode *BCResumeVal = PHINode::Create(
OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
Value *EndValue;
Value *&EndValue = IVEndValues[OrigPhi];
if (OrigPhi == OldInduction) {
// We know what the end value is.
EndValue = CountRoundDown;
@ -3443,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
// or the value at the end of the vectorized loop.
BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix up external users of the induction variable.
fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
@ -4116,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
} // end of for each Phi in PHIsToFix.
fixLCSSAPHIs();
// Make sure DomTree is updated.
// Update the dominator tree.
//
// FIXME: After creating the structure of the new loop, the dominator tree is
// no longer up-to-date, and it remains that way until we update it
// here. An out-of-date dominator tree is problematic for SCEV,
// because SCEVExpander uses it to guide code generation. The
// vectorizer use SCEVExpanders in several places. Instead, we should
// keep the dominator tree up-to-date as we go.
updateAnalysis();
// Fix-up external users of the induction variables.
for (auto &Entry : *Legal->getInductionVars())
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
IVEndValues[Entry.first], LoopMiddleBlock);
fixLCSSAPHIs();
predicateInstructions();
// Remove redundant induction instructions.

View File

@ -651,7 +651,8 @@ class Expr : public Stmt {
/// constant.
bool EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
const FunctionDecl *Callee,
ArrayRef<const Expr*> Args) const;
ArrayRef<const Expr*> Args,
const Expr *This = nullptr) const;
/// \brief If the current Expr is a pointer, this will try to statically
/// determine the number of bytes available where the pointer is pointing.

View File

@ -140,12 +140,15 @@ class Argument<string name, bit optional, bit fake = 0> {
bit Fake = fake;
}
class BoolArgument<string name, bit opt = 0> : Argument<name, opt>;
class BoolArgument<string name, bit opt = 0, bit fake = 0> : Argument<name, opt,
fake>;
class IdentifierArgument<string name, bit opt = 0> : Argument<name, opt>;
class IntArgument<string name, bit opt = 0> : Argument<name, opt>;
class StringArgument<string name, bit opt = 0> : Argument<name, opt>;
class ExprArgument<string name, bit opt = 0> : Argument<name, opt>;
class FunctionArgument<string name, bit opt = 0> : Argument<name, opt>;
class FunctionArgument<string name, bit opt = 0, bit fake = 0> : Argument<name,
opt,
fake>;
class TypeArgument<string name, bit opt = 0> : Argument<name, opt>;
class UnsignedArgument<string name, bit opt = 0> : Argument<name, opt>;
class VariadicUnsignedArgument<string name> : Argument<name, 1>;
@ -1591,6 +1594,26 @@ def Unavailable : InheritableAttr {
let Documentation = [Undocumented];
}
def DiagnoseIf : InheritableAttr {
let Spellings = [GNU<"diagnose_if">];
let Subjects = SubjectList<[Function]>;
let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
EnumArgument<"DiagnosticType",
"DiagnosticType",
["error", "warning"],
["DT_Error", "DT_Warning"]>,
BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
FunctionArgument<"Parent", 0, /*fake*/ 1>];
let DuplicatesAllowedWhileMerging = 1;
let LateParsed = 1;
let AdditionalMembers = [{
bool isError() const { return diagnosticType == DT_Error; }
bool isWarning() const { return diagnosticType == DT_Warning; }
}];
let TemplateDependent = 1;
let Documentation = [DiagnoseIfDocs];
}
def ArcWeakrefUnavailable : InheritableAttr {
let Spellings = [GNU<"objc_arc_weak_reference_unavailable">];
let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;

View File

@ -378,6 +378,65 @@ template instantiation, so the value for ``T::number`` is known.
}];
}
def DiagnoseIfDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``diagnose_if`` attribute can be placed on function declarations to emit
warnings or errors at compile-time if calls to the attributed function meet
certain user-defined criteria. For example:
.. code-block:: c
void abs(int a)
__attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning")));
void must_abs(int a)
__attribute__((diagnose_if(a >= 0, "Redundant abs call", "error")));
int val = abs(1); // warning: Redundant abs call
int val2 = must_abs(1); // error: Redundant abs call
int val3 = abs(val);
int val4 = must_abs(val); // Because run-time checks are not emitted for
// diagnose_if attributes, this executes without
// issue.
``diagnose_if`` is closely related to ``enable_if``, with a few key differences:
* Overload resolution is not aware of ``diagnose_if`` attributes: they're
considered only after we select the best candidate from a given candidate set.
* Function declarations that differ only in their ``diagnose_if`` attributes are
considered to be redeclarations of the same function (not overloads).
* If the condition provided to ``diagnose_if`` cannot be evaluated, no
diagnostic will be emitted.
Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``.
As a result of bullet number two, ``diagnose_if`` attributes will stack on the
same function. For example:
.. code-block:: c
int foo() __attribute__((diagnose_if(1, "diag1", "warning")));
int foo() __attribute__((diagnose_if(1, "diag2", "warning")));
int bar = foo(); // warning: diag1
// warning: diag2
int (*fooptr)(void) = foo; // warning: diag1
// warning: diag2
constexpr int supportsAPILevel(int N) { return N < 5; }
int baz(int a)
__attribute__((diagnose_if(!supportsAPILevel(10),
"Upgrade to API level 10 to use baz", "error")));
int baz(int a)
__attribute__((diagnose_if(!a, "0 is not recommended.", "warning")));
int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz
int v = baz(0); // error: Upgrade to API level 10 to use baz
Query for this feature with ``__has_attribute(diagnose_if)``.
}];
}
def PassObjectSizeDocs : Documentation {
let Category = DocCatVariable; // Technically it's a parameter doc, but eh.
let Content = [{

View File

@ -161,6 +161,8 @@ def ext_old_implicitly_unsigned_long_cxx : ExtWarn<
InGroup<CXX11Compat>;
def ext_clang_enable_if : Extension<"'enable_if' is a clang extension">,
InGroup<GccCompat>;
def ext_clang_diagnose_if : Extension<"'diagnose_if' is a clang extension">,
InGroup<GccCompat>;
// SEH
def err_seh_expected_handler : Error<

View File

@ -495,6 +495,7 @@ def UnusedPropertyIvar : DiagGroup<"unused-property-ivar">;
def UnusedGetterReturnValue : DiagGroup<"unused-getter-return-value">;
def UsedButMarkedUnused : DiagGroup<"used-but-marked-unused">;
def UserDefinedLiterals : DiagGroup<"user-defined-literals">;
def UserDefinedWarnings : DiagGroup<"user-defined-warnings">;
def Reorder : DiagGroup<"reorder">;
def UndeclaredSelector : DiagGroup<"undeclared-selector">;
def ImplicitAtomic : DiagGroup<"implicit-atomic-properties">;
@ -683,7 +684,8 @@ def Most : DiagGroup<"most", [
OverloadedVirtual,
PrivateExtern,
SelTypeCast,
ExternCCompat
ExternCCompat,
UserDefinedWarnings
]>;
// Thread Safety warnings

View File

@ -2141,8 +2141,11 @@ def err_constexpr_local_var_no_init : Error<
def ext_constexpr_function_never_constant_expr : ExtWarn<
"constexpr %select{function|constructor}0 never produces a "
"constant expression">, InGroup<DiagGroup<"invalid-constexpr">>, DefaultError;
def err_enable_if_never_constant_expr : Error<
"'enable_if' attribute expression never produces a constant expression">;
def err_attr_cond_never_constant_expr : Error<
"%0 attribute expression never produces a constant expression">;
def err_diagnose_if_invalid_diagnostic_type : Error<
"invalid diagnostic type for 'diagnose_if'; use \"error\" or \"warning\" "
"instead">;
def err_constexpr_body_no_return : Error<
"no return statement in constexpr function">;
def err_constexpr_return_missing_expr : Error<
@ -3333,6 +3336,9 @@ def note_ovl_candidate : Note<"candidate "
def note_ovl_candidate_inherited_constructor : Note<
"constructor from base class %0 inherited here">;
def note_ovl_candidate_inherited_constructor_slice : Note<
"constructor inherited from base class cannot be used to initialize from "
"an argument of the derived class type">;
def note_ovl_candidate_illegal_constructor : Note<
"candidate %select{constructor|template}0 ignored: "
"instantiation %select{takes|would take}0 its own class type by value">;
@ -3366,7 +3372,9 @@ def note_ovl_candidate_disabled_by_enable_if : Note<
def note_ovl_candidate_has_pass_object_size_params: Note<
"candidate address cannot be taken because parameter %0 has "
"pass_object_size attribute">;
def note_ovl_candidate_disabled_by_enable_if_attr : Note<
def err_diagnose_if_succeeded : Error<"%0">;
def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>;
def note_ovl_candidate_disabled_by_function_cond_attr : Note<
"candidate disabled: %0">;
def note_ovl_candidate_disabled_by_extension : Note<
"candidate disabled due to OpenCL extension">;
@ -4395,6 +4403,7 @@ def note_not_found_by_two_phase_lookup : Note<"%0 should be declared prior to th
def err_undeclared_use : Error<"use of undeclared %0">;
def warn_deprecated : Warning<"%0 is deprecated">,
InGroup<DeprecatedDeclarations>;
def note_from_diagnose_if : Note<"from 'diagnose_if' attribute on %0:">;
def warn_property_method_deprecated :
Warning<"property access is using %0 method which is deprecated">,
InGroup<DeprecatedDeclarations>;

View File

@ -146,6 +146,7 @@ LANGOPT(Modules , 1, 0, "modules extension to C")
COMPATIBLE_LANGOPT(ModulesTS , 1, 0, "C++ Modules TS")
BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 2, CMK_None,
"compiling a module interface")
BENIGN_LANGOPT(CompilingPCH, 1, 0, "building a pch")
COMPATIBLE_LANGOPT(ModulesDeclUse , 1, 0, "require declaration of module uses")
BENIGN_LANGOPT(ModulesSearchAll , 1, 1, "searching even non-imported modules to find unresolved references")
COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "requiring declaration of module uses and all headers to be in modules")

View File

@ -167,6 +167,9 @@ def disable_llvm_passes : Flag<["-"], "disable-llvm-passes">,
"frontend by not running any LLVM passes at all">;
def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">,
Alias<disable_llvm_passes>;
def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">,
HelpText<"Disable lifetime-markers emission even when optimizations are "
"enabled">;
def disable_red_zone : Flag<["-"], "disable-red-zone">,
HelpText<"Do not emit code that uses the red zone.">;
def dwarf_column_info : Flag<["-"], "dwarf-column-info">,

View File

@ -52,6 +52,7 @@ CODEGENOPT(DisableGCov , 1, 0) ///< Don't run the GCov pass, for testing.
CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
///< the pristine IR generated by the
///< frontend.
CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental
///< pass manager.
CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled.

View File

@ -88,6 +88,8 @@ class GeneratePCHAction : public ASTFrontendAction {
static std::unique_ptr<raw_pwrite_stream>
ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
std::string &Sysroot, std::string &OutputFile);
bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
};
class GenerateModuleAction : public ASTFrontendAction {

View File

@ -59,6 +59,13 @@ enum class SymbolLanguage {
CXX,
};
/// Language specific sub-kinds.
enum class SymbolSubKind {
None,
CXXCopyConstructor,
CXXMoveConstructor,
};
/// Set of properties that provide additional info about a symbol.
enum class SymbolProperty : uint8_t {
Generic = 1 << 0,
@ -107,6 +114,7 @@ struct SymbolRelation {
struct SymbolInfo {
SymbolKind Kind;
SymbolSubKind SubKind;
SymbolPropertySet Properties;
SymbolLanguage Lang;
};
@ -121,6 +129,7 @@ void printSymbolRoles(SymbolRoleSet Roles, raw_ostream &OS);
bool printSymbolName(const Decl *D, const LangOptions &LO, raw_ostream &OS);
StringRef getSymbolKindString(SymbolKind K);
StringRef getSymbolSubKindString(SymbolSubKind K);
StringRef getSymbolLanguageString(SymbolLanguage K);
void applyForEachSymbolProperty(SymbolPropertySet Props,

View File

@ -215,14 +215,14 @@ class InitializedEntity {
/// \brief Create the initialization entity for a parameter.
static InitializedEntity InitializeParameter(ASTContext &Context,
ParmVarDecl *Parm) {
const ParmVarDecl *Parm) {
return InitializeParameter(Context, Parm, Parm->getType());
}
/// \brief Create the initialization entity for a parameter, but use
/// another type.
static InitializedEntity InitializeParameter(ASTContext &Context,
ParmVarDecl *Parm,
const ParmVarDecl *Parm,
QualType Type) {
bool Consumed = (Context.getLangOpts().ObjCAutoRefCount &&
Parm->hasAttr<NSConsumedAttr>());

View File

@ -531,6 +531,13 @@ namespace clang {
Ambiguous.construct();
}
void setAsIdentityConversion(QualType T) {
setStandard();
Standard.setAsIdentityConversion();
Standard.setFromType(T);
Standard.setAllToTypes(T);
}
/// \brief Whether the target is really a std::initializer_list, and the
/// sequence only represents the worst element conversion.
bool isStdInitializerListElement() const {
@ -601,8 +608,17 @@ namespace clang {
/// This candidate was not viable because its OpenCL extension is disabled.
ovl_fail_ext_disabled,
/// This inherited constructor is not viable because it would slice the
/// argument.
ovl_fail_inhctor_slice,
};
/// A list of implicit conversion sequences for the arguments of an
/// OverloadCandidate.
typedef llvm::MutableArrayRef<ImplicitConversionSequence>
ConversionSequenceList;
/// OverloadCandidate - A single candidate in an overload set (C++ 13.3).
struct OverloadCandidate {
/// Function - The actual function that this candidate
@ -627,18 +643,13 @@ namespace clang {
/// is a surrogate, but only if IsSurrogate is true.
CXXConversionDecl *Surrogate;
/// Conversions - The conversion sequences used to convert the
/// function arguments to the function parameters, the pointer points to a
/// fixed size array with NumConversions elements. The memory is owned by
/// the OverloadCandidateSet.
ImplicitConversionSequence *Conversions;
/// The conversion sequences used to convert the function arguments
/// to the function parameters.
ConversionSequenceList Conversions;
/// The FixIt hints which can be used to fix the Bad candidate.
ConversionFixItGenerator Fix;
/// NumConversions - The number of elements in the Conversions array.
unsigned NumConversions;
/// Viable - True to indicate that this overload candidate is viable.
bool Viable;
@ -664,6 +675,26 @@ namespace clang {
/// to be used while performing partial ordering of function templates.
unsigned ExplicitCallArguments;
/// The number of diagnose_if attributes that this overload triggered.
/// If any of the triggered attributes are errors, this won't count
/// diagnose_if warnings.
unsigned NumTriggeredDiagnoseIfs = 0;
/// Basically a TinyPtrVector<DiagnoseIfAttr *> that doesn't own the vector:
/// If NumTriggeredDiagnoseIfs is 0 or 1, this is a DiagnoseIfAttr *,
/// otherwise it's a pointer to an array of `NumTriggeredDiagnoseIfs`
/// DiagnoseIfAttr *s.
llvm::PointerUnion<DiagnoseIfAttr *, DiagnoseIfAttr **> DiagnoseIfInfo;
/// Gets an ArrayRef for the data at DiagnoseIfInfo. Note that this may give
/// you a pointer into DiagnoseIfInfo.
ArrayRef<DiagnoseIfAttr *> getDiagnoseIfInfo() const {
auto *Ptr = NumTriggeredDiagnoseIfs <= 1
? DiagnoseIfInfo.getAddrOfPtr1()
: DiagnoseIfInfo.get<DiagnoseIfAttr **>();
return {Ptr, NumTriggeredDiagnoseIfs};
}
union {
DeductionFailureInfo DeductionFailure;
@ -677,9 +708,9 @@ namespace clang {
/// hasAmbiguousConversion - Returns whether this overload
/// candidate requires an ambiguous conversion or not.
bool hasAmbiguousConversion() const {
for (unsigned i = 0, e = NumConversions; i != e; ++i) {
if (!Conversions[i].isInitialized()) return false;
if (Conversions[i].isAmbiguous()) return true;
for (auto &C : Conversions) {
if (!C.isInitialized()) return false;
if (C.isAmbiguous()) return true;
}
return false;
}
@ -728,17 +759,42 @@ namespace clang {
SmallVector<OverloadCandidate, 16> Candidates;
llvm::SmallPtrSet<Decl *, 16> Functions;
// Allocator for OverloadCandidate::Conversions. We store the first few
// elements inline to avoid allocation for small sets.
llvm::BumpPtrAllocator ConversionSequenceAllocator;
// Allocator for ConversionSequenceLists and DiagnoseIfAttr* arrays.
// We store the first few of each of these inline to avoid allocation for
// small sets.
llvm::BumpPtrAllocator SlabAllocator;
SourceLocation Loc;
CandidateSetKind Kind;
unsigned NumInlineSequences;
llvm::AlignedCharArray<alignof(ImplicitConversionSequence),
16 * sizeof(ImplicitConversionSequence)>
InlineSpace;
constexpr static unsigned NumInlineBytes =
24 * sizeof(ImplicitConversionSequence);
unsigned NumInlineBytesUsed;
llvm::AlignedCharArray<alignof(void *), NumInlineBytes> InlineSpace;
/// If we have space, allocates from inline storage. Otherwise, allocates
/// from the slab allocator.
/// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
/// instead.
template <typename T>
T *slabAllocate(unsigned N) {
// It's simpler if this doesn't need to consider alignment.
static_assert(alignof(T) == alignof(void *),
"Only works for pointer-aligned types.");
static_assert(std::is_trivial<T>::value ||
std::is_same<ImplicitConversionSequence, T>::value,
"Add destruction logic to OverloadCandidateSet::clear().");
unsigned NBytes = sizeof(T) * N;
if (NBytes > NumInlineBytes - NumInlineBytesUsed)
return SlabAllocator.Allocate<T>(N);
char *FreeSpaceStart = InlineSpace.buffer + NumInlineBytesUsed;
assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
"Misaligned storage!");
NumInlineBytesUsed += NBytes;
return reinterpret_cast<T *>(FreeSpaceStart);
}
OverloadCandidateSet(const OverloadCandidateSet &) = delete;
void operator=(const OverloadCandidateSet &) = delete;
@ -747,12 +803,17 @@ namespace clang {
public:
OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK)
: Loc(Loc), Kind(CSK), NumInlineSequences(0) {}
: Loc(Loc), Kind(CSK), NumInlineBytesUsed(0) {}
~OverloadCandidateSet() { destroyCandidates(); }
SourceLocation getLocation() const { return Loc; }
CandidateSetKind getKind() const { return Kind; }
/// Make a DiagnoseIfAttr* array in a block of memory that will live for
/// as long as this OverloadCandidateSet. Returns a pointer to the start
/// of that array.
DiagnoseIfAttr **addDiagnoseIfComplaints(ArrayRef<DiagnoseIfAttr *> CA);
/// \brief Determine when this overload candidate will be new to the
/// overload set.
bool isNewCandidate(Decl *F) {
@ -769,30 +830,32 @@ namespace clang {
size_t size() const { return Candidates.size(); }
bool empty() const { return Candidates.empty(); }
/// \brief Add a new candidate with NumConversions conversion sequence slots
/// to the overload set.
OverloadCandidate &addCandidate(unsigned NumConversions = 0) {
Candidates.push_back(OverloadCandidate());
OverloadCandidate &C = Candidates.back();
// Assign space from the inline array if there are enough free slots
// available.
if (NumConversions + NumInlineSequences <= 16) {
ImplicitConversionSequence *I =
(ImplicitConversionSequence *)InlineSpace.buffer;
C.Conversions = &I[NumInlineSequences];
NumInlineSequences += NumConversions;
} else {
// Otherwise get memory from the allocator.
C.Conversions = ConversionSequenceAllocator
.Allocate<ImplicitConversionSequence>(NumConversions);
}
/// \brief Allocate storage for conversion sequences for NumConversions
/// conversions.
ConversionSequenceList
allocateConversionSequences(unsigned NumConversions) {
ImplicitConversionSequence *Conversions =
slabAllocate<ImplicitConversionSequence>(NumConversions);
// Construct the new objects.
for (unsigned i = 0; i != NumConversions; ++i)
new (&C.Conversions[i]) ImplicitConversionSequence();
for (unsigned I = 0; I != NumConversions; ++I)
new (&Conversions[I]) ImplicitConversionSequence();
C.NumConversions = NumConversions;
return ConversionSequenceList(Conversions, NumConversions);
}
/// \brief Add a new candidate with NumConversions conversion sequence slots
/// to the overload set.
OverloadCandidate &addCandidate(unsigned NumConversions = 0,
ConversionSequenceList Conversions = None) {
assert((Conversions.empty() || Conversions.size() == NumConversions) &&
"preallocated conversion sequence has wrong length");
Candidates.push_back(OverloadCandidate());
OverloadCandidate &C = Candidates.back();
C.Conversions = Conversions.empty()
? allocateConversionSequences(NumConversions)
: Conversions;
return C;
}

View File

@ -27,6 +27,7 @@
#include "clang/AST/NSAPI.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/AST/TypeLoc.h"
#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/ExpressionTraits.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
@ -119,6 +120,7 @@ namespace clang {
class FunctionProtoType;
class FunctionTemplateDecl;
class ImplicitConversionSequence;
typedef MutableArrayRef<ImplicitConversionSequence> ConversionSequenceList;
class InitListExpr;
class InitializationKind;
class InitializationSequence;
@ -806,6 +808,12 @@ class Sema {
/// run time.
Unevaluated,
/// \brief The current expression occurs within a braced-init-list within
/// an unevaluated operand. This is mostly like a regular unevaluated
/// context, except that we still instantiate constexpr functions that are
/// referenced here so that we can perform narrowing checks correctly.
UnevaluatedList,
/// \brief The current expression occurs within a discarded statement.
/// This behaves largely similarly to an unevaluated operand in preventing
/// definitions from being required, but not in other ways.
@ -898,7 +906,8 @@ class Sema {
MangleNumberingContext &getMangleNumberingContext(ASTContext &Ctx);
bool isUnevaluated() const {
return Context == Unevaluated || Context == UnevaluatedAbstract;
return Context == Unevaluated || Context == UnevaluatedAbstract ||
Context == UnevaluatedList;
}
};
@ -2510,10 +2519,11 @@ class Sema {
void AddOverloadCandidate(FunctionDecl *Function,
DeclAccessPair FoundDecl,
ArrayRef<Expr *> Args,
OverloadCandidateSet& CandidateSet,
OverloadCandidateSet &CandidateSet,
bool SuppressUserConversions = false,
bool PartialOverloading = false,
bool AllowExplicit = false);
bool AllowExplicit = false,
ConversionSequenceList EarlyConversions = None);
void AddFunctionCandidates(const UnresolvedSetImpl &Functions,
ArrayRef<Expr *> Args,
OverloadCandidateSet &CandidateSet,
@ -2523,23 +2533,25 @@ class Sema {
void AddMethodCandidate(DeclAccessPair FoundDecl,
QualType ObjectType,
Expr::Classification ObjectClassification,
ArrayRef<Expr *> Args,
Expr *ThisArg, ArrayRef<Expr *> Args,
OverloadCandidateSet& CandidateSet,
bool SuppressUserConversion = false);
void AddMethodCandidate(CXXMethodDecl *Method,
DeclAccessPair FoundDecl,
CXXRecordDecl *ActingContext, QualType ObjectType,
Expr::Classification ObjectClassification,
ArrayRef<Expr *> Args,
Expr *ThisArg, ArrayRef<Expr *> Args,
OverloadCandidateSet& CandidateSet,
bool SuppressUserConversions = false,
bool PartialOverloading = false);
bool PartialOverloading = false,
ConversionSequenceList EarlyConversions = None);
void AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl,
DeclAccessPair FoundDecl,
CXXRecordDecl *ActingContext,
TemplateArgumentListInfo *ExplicitTemplateArgs,
QualType ObjectType,
Expr::Classification ObjectClassification,
Expr *ThisArg,
ArrayRef<Expr *> Args,
OverloadCandidateSet& CandidateSet,
bool SuppressUserConversions = false,
@ -2551,6 +2563,16 @@ class Sema {
OverloadCandidateSet& CandidateSet,
bool SuppressUserConversions = false,
bool PartialOverloading = false);
bool CheckNonDependentConversions(FunctionTemplateDecl *FunctionTemplate,
ArrayRef<QualType> ParamTypes,
ArrayRef<Expr *> Args,
OverloadCandidateSet &CandidateSet,
ConversionSequenceList &Conversions,
bool SuppressUserConversions,
CXXRecordDecl *ActingContext = nullptr,
QualType ObjectType = QualType(),
Expr::Classification
ObjectClassification = {});
void AddConversionCandidate(CXXConversionDecl *Conversion,
DeclAccessPair FoundDecl,
CXXRecordDecl *ActingContext,
@ -2603,6 +2625,38 @@ class Sema {
EnableIfAttr *CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
bool MissingImplicitThis = false);
/// Check the diagnose_if attributes on the given function. Returns the
/// first succesful fatal attribute, or null if calling Function(Args) isn't
/// an error.
///
/// This only considers ArgDependent DiagnoseIfAttrs.
///
/// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
/// succeed. If this function returns non-null, the contents of Nonfatal are
/// unspecified.
DiagnoseIfAttr *
checkArgDependentDiagnoseIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
bool MissingImplicitThis = false,
Expr *ThisArg = nullptr);
/// Check the diagnose_if expressions on the given function. Returns the
/// first succesful fatal attribute, or null if using Function isn't
/// an error.
///
/// This ignores all ArgDependent DiagnoseIfAttrs.
///
/// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
/// succeed. If this function returns non-null, the contents of Nonfatal are
/// unspecified.
DiagnoseIfAttr *
checkArgIndependentDiagnoseIf(FunctionDecl *Function,
SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal);
/// Emits the diagnostic contained in the given DiagnoseIfAttr at Loc. Also
/// emits a note about the location of said attribute.
void emitDiagnoseIfDiagnostic(SourceLocation Loc, const DiagnoseIfAttr *DIA);
/// Returns whether the given function's address can be taken or not,
/// optionally emitting a diagnostic if the address can't be taken.
///
@ -3801,6 +3855,9 @@ class Sema {
/// variable will have in the given scope.
QualType getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc);
/// Mark all of the declarations referenced within a particular AST node as
/// referenced. Used when template instantiation instantiates a non-dependent
/// type -- entities referenced by the type are now referenced.
void MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T);
void MarkDeclarationsReferencedInExpr(Expr *E,
bool SkipLocalVariables = false);
@ -6580,6 +6637,8 @@ class Sema {
/// \brief The explicitly-specified template arguments were not valid
/// template arguments for the given template.
TDK_InvalidExplicitArguments,
/// \brief Checking non-dependent argument conversions failed.
TDK_NonDependentConversionFailure,
/// \brief Deduction failed; that's all we know.
TDK_MiscellaneousDeductionFailure,
/// \brief CUDA Target attributes do not match.
@ -6618,22 +6677,21 @@ class Sema {
QualType OriginalArgType;
};
TemplateDeductionResult
FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
unsigned NumExplicitlySpecified,
FunctionDecl *&Specialization,
sema::TemplateDeductionInfo &Info,
SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
bool PartialOverloading = false);
TemplateDeductionResult FinishTemplateArgumentDeduction(
FunctionTemplateDecl *FunctionTemplate,
SmallVectorImpl<DeducedTemplateArgument> &Deduced,
unsigned NumExplicitlySpecified, FunctionDecl *&Specialization,
sema::TemplateDeductionInfo &Info,
SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
bool PartialOverloading = false,
llvm::function_ref<bool()> CheckNonDependent = []{ return false; });
TemplateDeductionResult
DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
TemplateArgumentListInfo *ExplicitTemplateArgs,
ArrayRef<Expr *> Args,
FunctionDecl *&Specialization,
sema::TemplateDeductionInfo &Info,
bool PartialOverloading = false);
TemplateDeductionResult DeduceTemplateArguments(
FunctionTemplateDecl *FunctionTemplate,
TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef<Expr *> Args,
FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info,
bool PartialOverloading,
llvm::function_ref<bool(ArrayRef<QualType>)> CheckNonDependent);
TemplateDeductionResult
DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
@ -6877,6 +6935,10 @@ class Sema {
/// Specializations whose definitions are currently being instantiated.
llvm::DenseSet<std::pair<Decl *, unsigned>> InstantiatingSpecializations;
/// Non-dependent types used in templates that have already been instantiated
/// by some template instantiation.
llvm::DenseSet<QualType> InstantiatedNonDependentTypes;
/// \brief Extra modules inspected when performing a lookup during a template
/// instantiation. Computed lazily.
SmallVector<Module*, 16> ActiveTemplateInstantiationLookupModules;
@ -10186,6 +10248,22 @@ class EnterExpressionEvaluationContext {
IsDecltype);
}
enum InitListTag { InitList };
EnterExpressionEvaluationContext(Sema &Actions, InitListTag,
bool ShouldEnter = true)
: Actions(Actions), Entered(false) {
// In C++11 onwards, narrowing checks are performed on the contents of
// braced-init-lists, even when they occur within unevaluated operands.
// Therefore we still need to instantiate constexpr functions used in such
// a context.
if (ShouldEnter && Actions.isUnevaluatedContext() &&
Actions.getLangOpts().CPlusPlus11) {
Actions.PushExpressionEvaluationContext(Sema::UnevaluatedList, nullptr,
false);
Entered = true;
}
}
~EnterExpressionEvaluationContext() {
if (Entered)
Actions.PopExpressionEvaluationContext();

View File

@ -278,6 +278,14 @@ def VirtualCallChecker : Checker<"VirtualCall">,
} // end: "optin.cplusplus"
let ParentPackage = CplusplusAlpha in {
def IteratorPastEndChecker : Checker<"IteratorPastEnd">,
HelpText<"Check iterators used past end">,
DescFile<"IteratorPastEndChecker.cpp">;
} // end: "alpha.cplusplus"
//===----------------------------------------------------------------------===//
// Valist checkers.

View File

@ -4543,6 +4543,12 @@ class ExprEvaluatorBase
Call.getLValueBase().dyn_cast<const ValueDecl*>());
if (!FD)
return Error(Callee);
// Don't call function pointers which have been cast to some other type.
// Per DR (no number yet), the caller and callee can differ in noexcept.
if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
CalleeType->getPointeeType(), FD->getType())) {
return Error(E);
}
// Overloaded operator calls to member functions are represented as normal
// calls with '*this' as the first argument.
@ -4558,14 +4564,42 @@ class ExprEvaluatorBase
return false;
This = &ThisVal;
Args = Args.slice(1);
} else if (MD && MD->isLambdaStaticInvoker()) {
// Map the static invoker for the lambda back to the call operator.
// Conveniently, we don't have to slice out the 'this' argument (as is
// being done for the non-static case), since a static member function
// doesn't have an implicit argument passed in.
const CXXRecordDecl *ClosureClass = MD->getParent();
assert(
ClosureClass->captures_begin() == ClosureClass->captures_end() &&
"Number of captures must be zero for conversion to function-ptr");
const CXXMethodDecl *LambdaCallOp =
ClosureClass->getLambdaCallOperator();
// Set 'FD', the function that will be called below, to the call
// operator. If the closure object represents a generic lambda, find
// the corresponding specialization of the call operator.
if (ClosureClass->isGenericLambda()) {
assert(MD->isFunctionTemplateSpecialization() &&
"A generic lambda's static-invoker function must be a "
"template specialization");
const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
FunctionTemplateDecl *CallOpTemplate =
LambdaCallOp->getDescribedFunctionTemplate();
void *InsertPos = nullptr;
FunctionDecl *CorrespondingCallOpSpecialization =
CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
assert(CorrespondingCallOpSpecialization &&
"We must always have a function call operator specialization "
"that corresponds to our static invoker specialization");
FD = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
} else
FD = LambdaCallOp;
}
// Don't call function pointers which have been cast to some other type.
// Per DR (no number yet), the caller and callee can differ in noexcept.
if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
CalleeType->getPointeeType(), FD->getType())) {
return Error(E);
}
} else
return Error(E);
@ -5834,6 +5868,7 @@ namespace {
bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
return VisitCXXConstructExpr(E, E->getType());
}
bool VisitLambdaExpr(const LambdaExpr *E);
bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T);
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
@ -6168,6 +6203,21 @@ bool RecordExprEvaluator::VisitCXXStdInitializerListExpr(
return true;
}
bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
const CXXRecordDecl *ClosureClass = E->getLambdaClass();
if (ClosureClass->isInvalidDecl()) return false;
if (Info.checkingPotentialConstantExpression()) return true;
if (E->capture_size()) {
Info.FFDiag(E, diag::note_unimplemented_constexpr_lambda_feature_ast)
<< "can not evaluate lambda expressions with captures";
return false;
}
// FIXME: Implement captures.
Result = APValue(APValue::UninitStruct(), /*NumBases*/0, /*NumFields*/0);
return true;
}
static bool EvaluateRecord(const Expr *E, const LValue &This,
APValue &Result, EvalInfo &Info) {
assert(E->isRValue() && E->getType()->isRecordType() &&
@ -6217,6 +6267,9 @@ class TemporaryExprEvaluator
bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) {
return VisitConstructExpr(E);
}
bool VisitLambdaExpr(const LambdaExpr *E) {
return VisitConstructExpr(E);
}
};
} // end anonymous namespace
@ -10357,10 +10410,25 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result,
bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
const FunctionDecl *Callee,
ArrayRef<const Expr*> Args) const {
ArrayRef<const Expr*> Args,
const Expr *This) const {
Expr::EvalStatus Status;
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated);
LValue ThisVal;
const LValue *ThisPtr = nullptr;
if (This) {
#ifndef NDEBUG
auto *MD = dyn_cast<CXXMethodDecl>(Callee);
assert(MD && "Don't provide `this` for non-methods.");
assert(!MD->isStatic() && "Don't provide `this` for static methods.");
#endif
if (EvaluateObjectArgument(Info, This, ThisVal))
ThisPtr = &ThisVal;
if (Info.EvalStatus.HasSideEffects)
return false;
}
ArgVector ArgValues(Args.size());
for (ArrayRef<const Expr*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
@ -10373,7 +10441,7 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
}
// Build fake call to Callee.
CallStackFrame Frame(Info, Callee->getLocation(), Callee, /*This*/nullptr,
CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr,
ArgValues.data());
return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects;
}

View File

@ -109,13 +109,13 @@ static const DeclContext *getEffectiveParentContext(const DeclContext *DC) {
static const FunctionDecl *getStructor(const NamedDecl *ND) {
if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(ND))
return FTD->getTemplatedDecl();
return FTD->getTemplatedDecl()->getCanonicalDecl();
const auto *FD = cast<FunctionDecl>(ND);
if (const auto *FTD = FD->getPrimaryTemplate())
return FTD->getTemplatedDecl();
return FTD->getTemplatedDecl()->getCanonicalDecl();
return FD;
return FD->getCanonicalDecl();
}
/// MicrosoftMangleContextImpl - Overrides the default MangleContext for the
@ -312,6 +312,10 @@ class MicrosoftCXXNameMangler {
void mangleNestedName(const NamedDecl *ND);
private:
bool isStructorDecl(const NamedDecl *ND) const {
return ND == Structor || getStructor(ND) == Structor;
}
void mangleUnqualifiedName(const NamedDecl *ND) {
mangleUnqualifiedName(ND, ND->getDeclName());
}
@ -898,7 +902,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
llvm_unreachable("Can't mangle Objective-C selector names here!");
case DeclarationName::CXXConstructorName:
if (Structor == getStructor(ND)) {
if (isStructorDecl(ND)) {
if (StructorType == Ctor_CopyingClosure) {
Out << "?_O";
return;
@ -912,7 +916,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
return;
case DeclarationName::CXXDestructorName:
if (ND == Structor)
if (isStructorDecl(ND))
// If the named decl is the C++ destructor we're mangling,
// use the type we were given.
mangleCXXDtorType(static_cast<CXXDtorType>(StructorType));
@ -1862,7 +1866,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
IsStructor = true;
IsCtorClosure = (StructorType == Ctor_CopyingClosure ||
StructorType == Ctor_DefaultClosure) &&
getStructor(MD) == Structor;
isStructorDecl(MD);
if (IsCtorClosure)
CC = getASTContext().getDefaultCallingConvention(
/*IsVariadic=*/false, /*IsCXXMethod=*/true);
@ -1883,7 +1887,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
// <return-type> ::= <type>
// ::= @ # structors (they have no declared return type)
if (IsStructor) {
if (isa<CXXDestructorDecl>(D) && D == Structor &&
if (isa<CXXDestructorDecl>(D) && isStructorDecl(D) &&
StructorType == Dtor_Deleting) {
// The scalar deleting destructor takes an extra int argument.
// However, the FunctionType generated has 0 arguments.

View File

@ -312,7 +312,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
// At O0 and O1 we only run the always inliner which is more efficient. At
// higher optimization levels we run the normal inliner.
if (CodeGenOpts.OptimizationLevel <= 1) {
bool InsertLifetimeIntrinsics = CodeGenOpts.OptimizationLevel != 0;
bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 &&
!CodeGenOpts.DisableLifetimeMarkers);
PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
} else {
PMBuilder.Inliner = createFunctionInliningPass(
@ -519,11 +520,22 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
.Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
assert(RM.hasValue() && "invalid PIC model!");
CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
CodeGenOpt::Level OptLevel;
switch (CodeGenOpts.OptimizationLevel) {
default: break;
case 0: OptLevel = CodeGenOpt::None; break;
case 3: OptLevel = CodeGenOpt::Aggressive; break;
default:
llvm_unreachable("Invalid optimization level!");
case 0:
OptLevel = CodeGenOpt::None;
break;
case 1:
OptLevel = CodeGenOpt::Less;
break;
case 2:
OptLevel = CodeGenOpt::Default;
break; // O2/Os/Oz
case 3:
OptLevel = CodeGenOpt::Aggressive;
break;
}
llvm::TargetOptions Options;
@ -849,21 +861,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
}
}
static void runThinLTOBackend(const CodeGenOptions &CGOpts, Module *M,
static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
std::unique_ptr<raw_pwrite_stream> OS) {
// If we are performing a ThinLTO importing compile, load the function index
// into memory and pass it into thinBackend, which will run the function
// importer and invoke LTO passes.
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
if (!IndexOrErr) {
logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
"Error loading index file '" +
CGOpts.ThinLTOIndexFile + "': ");
return;
}
std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@ -949,8 +948,26 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
if (!CGOpts.ThinLTOIndexFile.empty()) {
runThinLTOBackend(CGOpts, M, std::move(OS));
return;
// If we are performing a ThinLTO importing compile, load the function index
// into memory and pass it into runThinLTOBackend, which will run the
// function importer and invoke LTO passes.
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
if (!IndexOrErr) {
logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
"Error loading index file '" +
CGOpts.ThinLTOIndexFile + "': ");
return;
}
std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
// A null CombinedIndex means we should skip ThinLTO compilation
// (LLVM will optionally ignore empty index files, returning null instead
// of an error).
bool DoThinLTOBackend = CombinedIndex != nullptr;
if (DoThinLTOBackend) {
runThinLTOBackend(CombinedIndex.get(), M, std::move(OS));
return;
}
}
EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M);

View File

@ -616,6 +616,8 @@ struct EHPersonality {
static const EHPersonality GNU_C_SJLJ;
static const EHPersonality GNU_C_SEH;
static const EHPersonality GNU_ObjC;
static const EHPersonality GNU_ObjC_SJLJ;
static const EHPersonality GNU_ObjC_SEH;
static const EHPersonality GNUstep_ObjC;
static const EHPersonality GNU_ObjCXX;
static const EHPersonality NeXT_ObjC;

View File

@ -97,6 +97,10 @@ EHPersonality::GNU_CPlusPlus_SEH = { "__gxx_personality_seh0", nullptr };
const EHPersonality
EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
const EHPersonality
EHPersonality::GNU_ObjC_SJLJ = {"__gnu_objc_personality_sj0", "objc_exception_throw"};
const EHPersonality
EHPersonality::GNU_ObjC_SEH = {"__gnu_objc_personality_seh0", "objc_exception_throw"};
const EHPersonality
EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
const EHPersonality
EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
@ -137,6 +141,10 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
// fallthrough
case ObjCRuntime::GCC:
case ObjCRuntime::ObjFW:
if (L.SjLjExceptions)
return EHPersonality::GNU_ObjC_SJLJ;
else if (useLibGCCSEHPersonality(T))
return EHPersonality::GNU_ObjC_SEH;
return EHPersonality::GNU_ObjC;
}
llvm_unreachable("bad runtime kind");

View File

@ -42,6 +42,9 @@ using namespace CodeGen;
/// markers.
static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
const LangOptions &LangOpts) {
if (CGOpts.DisableLifetimeMarkers)
return false;
// Asan uses markers for use-after-scope checks.
if (CGOpts.SanitizeAddressUseAfterScope)
return true;

View File

@ -3812,6 +3812,7 @@ ToolChain::CXXStdlibType NetBSD::GetDefaultCXXStdlibType() const {
if (Major >= 7 || Major == 0) {
switch (getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:

View File

@ -9644,6 +9644,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
if (Major >= 7 || Major == 0) {
switch (getToolChain().getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:

View File

@ -1282,9 +1282,7 @@ class AnnotatingParser {
return TT_UnaryOperator;
const FormatToken *NextToken = Tok.getNextNonComment();
if (!NextToken ||
NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal,
Keywords.kw_override) ||
if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) ||
(NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
return TT_PointerOrReference;
@ -2088,9 +2086,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
!Line.IsMultiVariableDeclStmt)))
return true;
if (Left.is(TT_PointerOrReference))
return Right.Tok.isLiteral() ||
Right.isOneOf(TT_BlockComment, Keywords.kw_final,
Keywords.kw_override) ||
return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
(Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
!Right.is(TT_StartOfName)) ||
(Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
(!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
tok::l_paren) &&

View File

@ -737,7 +737,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
return;
}
if (Next->is(tok::exclaim) && PreviousMustBeValue)
addUnwrappedLine();
return addUnwrappedLine();
bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
bool NextEndsTemplateExpr =
Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
@ -745,9 +745,10 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
(PreviousMustBeValue ||
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
tok::minusminus)))
addUnwrappedLine();
if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
addUnwrappedLine();
return addUnwrappedLine();
if ((PreviousMustBeValue || Previous->is(tok::r_brace)) &&
isJSDeclOrStmt(Keywords, Next))
return addUnwrappedLine();
}
void UnwrappedLineParser::parseStructuralElement() {
@ -1974,7 +1975,14 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
!FormatTok->isStringLiteral())
return;
while (!eof() && FormatTok->isNot(tok::semi)) {
while (!eof()) {
if (FormatTok->is(tok::semi))
return;
if (Line->Tokens.size() == 0) {
// Common issue: Automatic Semicolon Insertion wrapped the line, so the
// import statement should terminate.
return;
}
if (FormatTok->is(tok::l_brace)) {
FormatTok->BlockKind = BK_Block;
parseBracedList();

View File

@ -520,6 +520,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists;
Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes);
Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers);
Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables);
Opts.UseRegisterSizedBitfieldAccess = Args.hasArg(

View File

@ -127,6 +127,12 @@ GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
return OS;
}
bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI,
StringRef Filename) {
CI.getLangOpts().CompilingPCH = true;
return true;
}
std::unique_ptr<ASTConsumer>
GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
StringRef InFile) {

View File

@ -53,6 +53,7 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
assert(D);
SymbolInfo Info;
Info.Kind = SymbolKind::Unknown;
Info.SubKind = SymbolSubKind::None;
Info.Properties = SymbolPropertySet();
Info.Lang = SymbolLanguage::C;
@ -183,10 +184,16 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
Info.Kind = SymbolKind::NamespaceAlias;
Info.Lang = SymbolLanguage::CXX;
break;
case Decl::CXXConstructor:
case Decl::CXXConstructor: {
Info.Kind = SymbolKind::Constructor;
Info.Lang = SymbolLanguage::CXX;
auto *CD = cast<CXXConstructorDecl>(D);
if (CD->isCopyConstructor())
Info.SubKind = SymbolSubKind::CXXCopyConstructor;
else if (CD->isMoveConstructor())
Info.SubKind = SymbolSubKind::CXXMoveConstructor;
break;
}
case Decl::CXXDestructor:
Info.Kind = SymbolKind::Destructor;
Info.Lang = SymbolLanguage::CXX;
@ -363,6 +370,15 @@ StringRef index::getSymbolKindString(SymbolKind K) {
llvm_unreachable("invalid symbol kind");
}
StringRef index::getSymbolSubKindString(SymbolSubKind K) {
switch (K) {
case SymbolSubKind::None: return "<none>";
case SymbolSubKind::CXXCopyConstructor: return "cxx-copy-ctor";
case SymbolSubKind::CXXMoveConstructor: return "cxx-move-ctor";
}
llvm_unreachable("invalid symbol subkind");
}
StringRef index::getSymbolLanguageString(SymbolLanguage K) {
switch (K) {
case SymbolLanguage::C: return "C";

View File

@ -1996,10 +1996,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// Ask HeaderInfo if we should enter this #include file. If not, #including
// this file will have no effect.
bool SkipHeader = false;
if (ShouldEnter &&
!HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
SuggestedModule.getModule())) {
ShouldEnter = false;
SkipHeader = true;
if (Callbacks)
Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
}
@ -2008,6 +2010,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (!ShouldEnter) {
// If this is a module import, make it visible if needed.
if (auto *M = SuggestedModule.getModule()) {
// When building a pch, -fmodule-name tells the compiler to textually
// include headers in the specified module. But it is possible that
// ShouldEnter is false because we are skipping the header. In that
// case, We are not importing the specified module.
if (SkipHeader && getLangOpts().CompilingPCH &&
M->getTopLevelModuleName() == getLangOpts().CurrentModule)
return;
makeModuleVisible(M, HashLoc);
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
@ -2032,6 +2042,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// Determine if we're switching to building a new submodule, and which one.
if (auto *M = SuggestedModule.getModule()) {
// When building a pch, -fmodule-name tells the compiler to textually
// include headers in the specified module. We are not building the
// specified module.
if (getLangOpts().CompilingPCH &&
M->getTopLevelModuleName() == getLangOpts().CurrentModule)
return;
assert(!CurSubmodule && "should not have marked this as a module yet");
CurSubmodule = M;

View File

@ -306,10 +306,11 @@ unsigned Parser::ParseAttributeArgsCommon(
// Parse the non-empty comma-separated list of expressions.
do {
bool ShouldEnter = attributeParsedArgsUnevaluated(*AttrName);
bool Uneval = attributeParsedArgsUnevaluated(*AttrName);
EnterExpressionEvaluationContext Unevaluated(
Actions, Sema::Unevaluated, /*LambdaContextDecl=*/nullptr,
/*IsDecltype=*/false, ShouldEnter);
Actions, Uneval ? Sema::Unevaluated : Sema::ConstantEvaluated,
/*LambdaContextDecl=*/nullptr,
/*IsDecltype=*/false);
ExprResult ArgExpr(
Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()));

View File

@ -404,6 +404,10 @@ ExprResult Parser::ParseBraceInitializer() {
return Actions.ActOnInitList(LBraceLoc, None, ConsumeBrace());
}
// Enter an appropriate expression evaluation context for an initializer list.
EnterExpressionEvaluationContext EnterContext(
Actions, EnterExpressionEvaluationContext::InitList);
bool InitExprsOk = true;
while (1) {

View File

@ -1242,7 +1242,8 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
QualType RHSTy = RHS.get()->getType();
llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
bool IsPolyUnsigned = Arch == llvm::Triple::aarch64;
bool IsPolyUnsigned = Arch == llvm::Triple::aarch64 ||
Arch == llvm::Triple::aarch64_be;
bool IsInt64Long =
Context.getTargetInfo().getInt64Type() == TargetInfo::SignedLong;
QualType EltTy =

Some files were not shown because too many files have changed in this diff Show More