Merge llvm, clang, lld and lldb trunk r291012, and resolve conflicts.

This commit is contained in:
Dimitry Andric 2017-01-04 22:19:42 +00:00
commit 8e0f8b8c96
130 changed files with 2367 additions and 1719 deletions

View File

@ -21,8 +21,8 @@
// class MyClass : public RefCountedBase<MyClass> {};
//
// void foo() {
// // Objects that inherit from RefCountedBase should always be instantiated
// // on the heap, never on the stack.
// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
// // 1 (from 0 in this case).
// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
//
// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
@ -68,9 +68,6 @@ namespace llvm {
/// calls to Release() and Retain(), which increment and decrement the object's
/// refcount, respectively. When a Release() call decrements the refcount to 0,
/// the object deletes itself.
///
/// Objects that inherit from RefCountedBase should always be allocated with
/// operator new.
template <class Derived> class RefCountedBase {
mutable unsigned RefCount = 0;

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Compiler.h"
#include <algorithm>
@ -107,6 +108,39 @@ public:
return false;
}
/// Insert a sequence of new elements into the PriorityWorklist.
template <typename SequenceT>
typename std::enable_if<!std::is_convertible<SequenceT, T>::value>::type
insert(SequenceT &&Input) {
if (std::begin(Input) == std::end(Input))
// Nothing to do for an empty input sequence.
return;
// First pull the input sequence into the vector as a bulk append
// operation.
ptrdiff_t StartIndex = V.size();
V.insert(V.end(), std::begin(Input), std::end(Input));
// Now walk backwards fixing up the index map and deleting any duplicates.
for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) {
auto InsertResult = M.insert({V[i], i});
if (InsertResult.second)
continue;
// If the existing index is before this insert's start, nuke that one and
// move it up.
ptrdiff_t &Index = InsertResult.first->second;
if (Index < StartIndex) {
V[Index] = T();
Index = i;
continue;
}
// Otherwise the existing one comes first so just clear out the value in
// this slot.
V[i] = T();
}
}
/// Remove the last element of the PriorityWorklist.
void pop_back() {
assert(!empty() && "Cannot remove an element when empty!");
@ -169,6 +203,11 @@ public:
return true;
}
/// Reverse the items in the PriorityWorklist.
///
/// This does an in-place reversal. Other kinds of reverse aren't easy to
/// support in the face of the worklist semantics.
/// Completely clear the PriorityWorklist
void clear() {
M.clear();

View File

@ -23,10 +23,9 @@ namespace llvm {
class DataLayout;
class MDNode;
/// isDereferenceablePointer - Return true if this is always a dereferenceable
/// pointer. If the context instruction is specified perform context-sensitive
/// analysis and return true if the pointer is dereferenceable at the
/// specified instruction.
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
/// if the pointer is dereferenceable at the specified instruction.
bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
/// isSafeToLoadUnconditionally - Return true if we know that executing a load
/// from this value cannot trap.
/// Return true if we know that executing a load from this value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
/// analysis and returns true if it is safe to load immediately before ScanFrom.
@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
/// DefMaxInstsToScan - the default number of maximum instructions
/// to scan in the block, used by FindAvailableLoadedValue().
/// The default number of maximum instructions to scan in the block, used by
/// FindAvailableLoadedValue().
extern cl::opt<unsigned> DefMaxInstsToScan;
/// \brief Scan backwards to see if we have the value of the given load
/// available locally within a small number of instructions.
/// Scan backwards to see if we have the value of the given load available
/// locally within a small number of instructions.
///
/// You can use this function to scan across multiple blocks: after you call
/// this function, if ScanFrom points at the beginning of the block, it's safe

View File

@ -208,6 +208,8 @@ public:
SledKind Kind;
bool AlwaysInstrument;
const class Function *Fn;
void emit(int, MCStreamer *, const MCSymbol *) const;
};
// All the sleds to be emitted.
@ -216,6 +218,9 @@ public:
// Helper function to record a given XRay sled.
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
/// Emit a table with all XRay instrumentation points.
void emitXRayTable();
//===------------------------------------------------------------------===//
// MachineFunctionPass Implementation.
//===------------------------------------------------------------------===//

View File

@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass {
/// such as BB == elt.NewBB.
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
/// The DominatorTreeBase that is used to compute a normal dominator tree
DominatorTreeBase<MachineBasicBlock>* DT;
/// \brief Apply all the recorded critical edges to the DT.
/// This updates the underlying DT information in a way that uses
/// the fast query path of DT as much as possible.
@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
DominatorTreeBase<MachineBasicBlock>* DT;
MachineDominatorTree();

View File

@ -116,12 +116,12 @@ public:
// An unsigned integer indicating the identity of the source file
// corresponding to a machine instruction.
uint16_t File;
// An unsigned integer whose value encodes the applicable instruction set
// architecture for the current instruction.
uint8_t Isa;
// An unsigned integer representing the DWARF path discriminator value
// for this location.
uint32_t Discriminator;
// An unsigned integer whose value encodes the applicable instruction set
// architecture for the current instruction.
uint8_t Isa;
// A boolean indicating that the current instruction is the beginning of a
// statement.
uint8_t IsStmt:1,

View File

@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
// Instruction Intrinsics
//===----------------------------------------------------------------------===//
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;

View File

@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Vector extract and insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_vextractf32x4_512 :
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti32x4_512 :
GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf32x4_256 :
GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti32x4_256 :
GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf64x2_256 :
GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti64x2_256 :
GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf64x2_512 :
GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti64x2_512 :
GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf32x8_512 :
GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti32x8_512 :
GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextractf64x4_512 :
GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vextracti64x4_512 :
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_insertf32x4_256 :
GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_insertf32x4_512 :
GCCBuiltin<"__builtin_ia32_insertf32x4_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_insertf32x8_512 :
GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_insertf64x2_256 :
GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_insertf64x2_512 :
GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_insertf64x4_512 :
GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti32x4_256 :
GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti32x4_512 :
GCCBuiltin<"__builtin_ia32_inserti32x4_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti32x8_512 :
GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti64x2_256 :
GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti64x2_512 :
GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_inserti64x4_512 :
GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// Conditional load ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,

View File

@ -769,17 +769,13 @@ namespace detail {
std::error_code directory_iterator_increment(DirIterState &);
std::error_code directory_iterator_destruct(DirIterState &);
/// DirIterState - Keeps state for the directory_iterator. It is reference
/// counted in order to preserve InputIterator semantics on copy.
struct DirIterState : public RefCountedBase<DirIterState> {
DirIterState()
: IterationHandle(0) {}
/// Keeps state for the directory_iterator.
struct DirIterState {
~DirIterState() {
directory_iterator_destruct(*this);
}
intptr_t IterationHandle;
intptr_t IterationHandle = 0;
directory_entry CurrentEntry;
};
} // end namespace detail
@ -788,23 +784,23 @@ namespace detail {
/// operator++ because we need an error_code. If it's really needed we can make
/// it call report_fatal_error on error.
class directory_iterator {
IntrusiveRefCntPtr<detail::DirIterState> State;
std::shared_ptr<detail::DirIterState> State;
public:
explicit directory_iterator(const Twine &path, std::error_code &ec) {
State = new detail::DirIterState;
State = std::make_shared<detail::DirIterState>();
SmallString<128> path_storage;
ec = detail::directory_iterator_construct(*State,
path.toStringRef(path_storage));
}
explicit directory_iterator(const directory_entry &de, std::error_code &ec) {
State = new detail::DirIterState;
State = std::make_shared<detail::DirIterState>();
ec = detail::directory_iterator_construct(*State, de.path());
}
/// Construct end iterator.
directory_iterator() : State(nullptr) {}
directory_iterator() = default;
// No operator++ because we need error_code.
directory_iterator &increment(std::error_code &ec) {

View File

@ -209,6 +209,15 @@ struct DocumentListTraits {
// static T::value_type& element(IO &io, T &seq, size_t index);
};
/// This class should be specialized by any type that needs to be converted
/// to/from a YAML mapping in the case where the names of the keys are not known
/// in advance, e.g. a string map.
template <typename T>
struct CustomMappingTraits {
// static void inputOne(IO &io, StringRef key, T &elem);
// static void output(IO &io, T &elem);
};
// Only used for better diagnostics of missing traits
template <typename T>
struct MissingTrait;
@ -358,6 +367,23 @@ public:
static bool const value = (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
};
// Test if CustomMappingTraits<T> is defined on type T.
template <class T>
struct has_CustomMappingTraits
{
typedef void (*Signature_input)(IO &io, StringRef key, T &v);
template <typename U>
static char test(SameType<Signature_input, &U::inputOne>*);
template <typename U>
static double test(...);
public:
static bool const value =
(sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
};
// has_FlowTraits<int> will cause an error with some compilers because
// it subclasses int. Using this wrapper only instantiates the
// real has_FlowTraits only if the template type is a class.
@ -493,6 +519,7 @@ struct missingTraits
!has_BlockScalarTraits<T>::value &&
!has_MappingTraits<T, Context>::value &&
!has_SequenceTraits<T>::value &&
!has_CustomMappingTraits<T>::value &&
!has_DocumentListTraits<T>::value> {};
template <typename T, typename Context>
@ -531,6 +558,7 @@ public:
virtual void endMapping() = 0;
virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0;
virtual void postflightKey(void*) = 0;
virtual std::vector<StringRef> keys() = 0;
virtual void beginFlowMapping() = 0;
virtual void endFlowMapping() = 0;
@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
}
}
template <typename T>
typename std::enable_if<has_CustomMappingTraits<T>::value, void>::type
yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
if ( io.outputting() ) {
io.beginMapping();
CustomMappingTraits<T>::output(io, Val);
io.endMapping();
} else {
io.beginMapping();
for (StringRef key : io.keys())
CustomMappingTraits<T>::inputOne(io, key, Val);
io.endMapping();
}
}
template <typename T>
typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
@ -1074,6 +1117,7 @@ private:
void endMapping() override;
bool preflightKey(const char *, bool, bool, bool &, void *&) override;
void postflightKey(void *) override;
std::vector<StringRef> keys() override;
void beginFlowMapping() override;
void endFlowMapping() override;
unsigned beginSequence() override;
@ -1154,10 +1198,8 @@ private:
typedef llvm::StringMap<std::unique_ptr<HNode>> NameToNode;
bool isValidKey(StringRef key);
NameToNode Mapping;
llvm::SmallVector<const char*, 6> ValidKeys;
llvm::SmallVector<std::string, 6> ValidKeys;
};
class SequenceHNode : public HNode {
@ -1215,6 +1257,7 @@ public:
void endMapping() override;
bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
void postflightKey(void *) override;
std::vector<StringRef> keys() override;
void beginFlowMapping() override;
void endFlowMapping() override;
unsigned beginSequence() override;
@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) {
return In;
}
// Define non-member operator>> so that Input can stream in a string map.
template <typename T>
inline
typename std::enable_if<has_CustomMappingTraits<T>::value, Input &>::type
operator>>(Input &In, T &Val) {
EmptyContext Ctx;
if (In.setCurrentDocument())
yamlize(In, Val, true, Ctx);
return In;
}
// Provide better error message about types missing a trait specialization
template <typename T>
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) {
return Out;
}
// Define non-member operator<< so that Output can stream out a string map.
template <typename T>
inline
typename std::enable_if<has_CustomMappingTraits<T>::value, Output &>::type
operator<<(Output &Out, T &Val) {
EmptyContext Ctx;
Out.beginDocuments();
if (Out.preflightDocument(0)) {
yamlize(Out, Val, true, Ctx);
Out.postflightDocument();
}
Out.endDocuments();
return Out;
}
// Provide better error message about types missing a trait specialization
template <typename T>
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
@ -1476,6 +1545,18 @@ template <typename T> struct SequenceTraitsImpl {
}
};
/// Implementation of CustomMappingTraits for std::map<std::string, T>.
template <typename T> struct StdMapStringCustomMappingTraitsImpl {
typedef std::map<std::string, T> map_type;
static void inputOne(IO &io, StringRef key, map_type &v) {
io.mapRequired(key.str().c_str(), v[key]);
}
static void output(IO &io, map_type &v) {
for (auto &p : v)
io.mapRequired(p.first.c_str(), p.second);
}
};
} // end namespace yaml
} // end namespace llvm
@ -1530,4 +1611,15 @@ template <typename T> struct SequenceTraitsImpl {
} \
}
/// Utility for declaring that std::map<std::string, _type> should be considered
/// a YAML map.
#define LLVM_YAML_IS_STRING_MAP(_type) \
namespace llvm { \
namespace yaml { \
template <> \
struct CustomMappingTraits<std::map<std::string, _type>> \
: public StdMapStringCustomMappingTraitsImpl<_type> {}; \
} \
}
#endif // LLVM_SUPPORT_YAMLTRAITS_H

View File

@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegZero();
// FIXME: Magic number! At the least, this should be given a name because it's
// used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
// expose it as a parameter, so it can be used for testing / experimenting.
if (Depth == MaxDepth)
return false; // Limit search depth.
@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
// FIXME: Magic number! At the least, this should be given a name because it's
// used similarly in CannotBeNegativeZero(). A better fix may be to
// expose it as a parameter, so it can be used for testing / experimenting.
if (Depth == MaxDepth)
return false; // Limit search depth.

View File

@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// handles the case where this is type ODRed with a definition needed
// by the importing module, in which case the existing definition is
// used.
if (IsImporting && !ImportFullTypeDefinitions &&
if (IsImporting && !ImportFullTypeDefinitions && Identifier &&
(Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type ||
Tag == dwarf::DW_TAG_structure_type ||

View File

@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
NoopInput = Op;
} else if (isa<CallInst>(I)) {
// Look through call (skipping callee)
for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
i != e; ++i) {
unsigned attrInd = i - I->op_begin() + 1;
if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
NoopInput = *i;
break;
}
}
} else if (isa<InvokeInst>(I)) {
// Look through invoke (skipping BB, BB, Callee)
for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
i != e; ++i) {
unsigned attrInd = i - I->op_begin() + 1;
if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
NoopInput = *i;
break;
}
}
} else if (auto CS = ImmutableCallSite(I)) {
const Value *ReturnedOp = CS.getReturnedArgOperand();
if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
NoopInput = ReturnedOp;
} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
// Value may come from either the aggregate or the scalar
ArrayRef<unsigned> InsertLoc = IVI->getIndices();

View File

@ -37,6 +37,8 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {}
void AsmPrinterHandler::markFunctionEnd() {}
// In the binary's "xray_instr_map" section, an array of these function entries
// describes each instrumentation point. When XRay patches your code, the index
// into this table will be given to your handler as a patch point identifier.
void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
const MCSymbol *CurrentFnSym) const {
Out->EmitSymbolValue(Sled, Bytes);
Out->EmitSymbolValue(CurrentFnSym, Bytes);
auto Kind8 = static_cast<uint8_t>(Kind);
Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
Out->EmitBytes(
StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries
}
void AsmPrinter::emitXRayTable() {
if (Sleds.empty())
return;
auto PrevSection = OutStreamer->getCurrentSectionOnly();
auto Fn = MF->getFunction();
MCSection *Section = nullptr;
if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
if (Fn->hasComdat()) {
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
Fn->getComdat()->getName());
} else {
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC);
}
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
// Before we switch over, we force a reference to a label inside the
// xray_instr_map section. Since this function is always called just
// before the function's end, we assume that this is happening after
// the last return instruction.
auto WordSizeBytes = TM.getPointerSize();
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
OutStreamer->EmitCodeAlignment(16);
OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
OutStreamer->SwitchSection(Section);
OutStreamer->EmitLabel(Tmp);
for (const auto &Sled : Sleds)
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}
void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();

View File

@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
// earlier spill with smaller SlotIndex.
for (const auto CurrentSpill : Spills) {
MachineBasicBlock *Block = CurrentSpill->getParent();
MachineDomTreeNode *Node = MDT.DT->getNode(Block);
MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
MachineInstr *PrevSpill = SpillBBToSpill[Node];
if (PrevSpill) {
SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
SpillsToRm.push_back(SpillToRm);
SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
} else {
SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
}
}
for (const auto SpillToRm : SpillsToRm)
@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
// Sort the nodes in WorkSet in top-down order and save the nodes
// in Orders. Orders will be used for hoisting in runHoistSpills.
unsigned idx = 0;
Orders.push_back(MDT.DT->getNode(Root));
Orders.push_back(MDT.getBase().getNode(Root));
do {
MachineDomTreeNode *Node = Orders[idx++];
const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();

View File

@ -4277,7 +4277,8 @@ struct BaseIndexOffset {
}
/// Parses tree in Ptr for base, index, offset addresses.
static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
int64_t PartialOffset = 0) {
bool IsIndexSignExt = false;
// Split up a folded GlobalAddress+Offset into its component parts.
@ -4286,7 +4287,7 @@ struct BaseIndexOffset {
return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
SDLoc(GA),
GA->getValueType(0),
/*Offset=*/0,
/*Offset=*/PartialOffset,
/*isTargetGA=*/false,
GA->getTargetFlags()),
SDValue(),
@ -4298,14 +4299,13 @@ struct BaseIndexOffset {
// instruction, then it could be just the BASE or everything else we don't
// know how to handle. Just use Ptr as BASE and give up.
if (Ptr->getOpcode() != ISD::ADD)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
// We know that we have at least an ADD instruction. Try to pattern match
// the simple case of BASE + OFFSET.
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
IsIndexSignExt);
return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
}
// Inside a loop the current BASE pointer is calculated using an ADD and a
@ -4314,7 +4314,7 @@ struct BaseIndexOffset {
// (i64 mul (i64 %induction_var)
// (i64 %element_size)))
if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
// Look at Base + Index + Offset cases.
SDValue Base = Ptr->getOperand(0);
@ -4328,14 +4328,14 @@ struct BaseIndexOffset {
// Either the case of Base + Index (no offset) or something else.
if (IndexOffset->getOpcode() != ISD::ADD)
return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
// Now we have the case of Base + Index + offset.
SDValue Index = IndexOffset->getOperand(0);
SDValue Offset = IndexOffset->getOperand(1);
if (!isa<ConstantSDNode>(Offset))
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
// Ignore signextends.
if (Index->getOpcode() == ISD::SIGN_EXTEND) {
@ -4344,7 +4344,7 @@ struct BaseIndexOffset {
} else IsIndexSignExt = false;
int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
}
};
} // namespace

View File

@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted(
// Use symbol info to iterate functions in the object.
for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
SymbolRef Sym = P.first;
if (Sym.getType() != SymbolRef::ST_Function)
if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function)
continue;
ErrorOr<StringRef> NameOrErr = Sym.getName();
if (NameOrErr.getError())
Expected<StringRef> NameOrErr = Sym.getName();
if (!NameOrErr)
continue;
StringRef Name = *NameOrErr;
ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
if (AddrOrErr.getError())
Expected<uint64_t> AddrOrErr = Sym.getAddress();
if (!AddrOrErr)
continue;
uint64_t Addr = *AddrOrErr;
uint64_t Size = P.second;
@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
for (symbol_iterator I = DebugObj.symbol_begin(),
E = DebugObj.symbol_end();
I != E; ++I) {
if (I->getType() == SymbolRef::ST_Function) {
ErrorOr<uint64_t> AddrOrErr = I->getAddress();
if (AddrOrErr.getError())
if (I->getType() && *I->getType() == SymbolRef::ST_Function) {
Expected<uint64_t> AddrOrErr = I->getAddress();
if (!AddrOrErr)
continue;
uint64_t Addr = *AddrOrErr;

View File

@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
Name.startswith("avx.vinsertf128.") || // Added in 3.7
Name == "avx2.vinserti128" || // Added in 3.7
Name.startswith("avx512.mask.insert") || // Added in 4.0
Name.startswith("avx.vextractf128.") || // Added in 3.7
Name == "avx2.vextracti128" || // Added in 3.7
Name.startswith("avx512.mask.vextract") || // Added in 4.0
Name.startswith("sse4a.movnt.") || // Added in 3.9
Name.startswith("avx.movnt.") || // Added in 3.2
Name.startswith("avx512.storent.") || // Added in 3.9
@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
Name == "avx2.vinserti128")) {
Name == "avx2.vinserti128" ||
Name.startswith("avx512.mask.insert"))) {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
unsigned DstNumElts = CI->getType()->getVectorNumElements();
unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
unsigned Scale = DstNumElts / SrcNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm & 1;
Imm = Imm % Scale;
// Extend the second operand into a vector that is twice as big.
// Extend the second operand into a vector the size of the destination.
Value *UndefV = UndefValue::get(Op1->getType());
SmallVector<uint32_t, 8> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
SmallVector<uint32_t, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i] = i;
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
Idxs[i] = SrcNumElts;
Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
// Insert the second operand into the first operand.
@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
// The low half of the result is either the low half of the 1st operand
// or the low half of the 2nd operand (the inserted vector).
for (unsigned i = 0; i != NumElts / 2; ++i)
Idxs[i] = Imm ? i : (i + NumElts);
// The high half of the result is either the low half of the 2nd operand
// (the inserted vector) or the high half of the 1st operand.
for (unsigned i = NumElts / 2; i != NumElts; ++i)
Idxs[i] = Imm ? (i + NumElts / 2) : i;
// First fill with identify mask.
for (unsigned i = 0; i != DstNumElts; ++i)
Idxs[i] = i;
// Then replace the elements where we need to insert.
for (unsigned i = 0; i != SrcNumElts; ++i)
Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->getNumArgOperands() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
Name == "avx2.vextracti128")) {
Name == "avx2.vextracti128" ||
Name.startswith("avx512.mask.vextract"))) {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
unsigned NumElts = VecTy->getNumElements();
unsigned DstNumElts = CI->getType()->getVectorNumElements();
unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
unsigned Scale = SrcNumElts / DstNumElts;
// Mask off the high bits of the immediate value; hardware ignores those.
Imm = Imm & 1;
Imm = Imm % Scale;
// Get indexes for either the high half or low half of the input vector.
SmallVector<uint32_t, 4> Idxs(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
Idxs[i] = Imm ? (i + NumElts) : i;
// Get indexes for the subvector of the input vector.
SmallVector<uint32_t, 8> Idxs(DstNumElts);
for (unsigned i = 0; i != DstNumElts; ++i) {
Idxs[i] = i + (Imm * DstNumElts);
}
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
Value *UndefV = UndefValue::get(Op0->getType());
Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
// If the intrinsic has a mask operand, handle that.
if (CI->getNumArgOperands() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
Rep = nullptr;
} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||

View File

@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
// Partition numbers for ThinLTO jobs start at 1 (see comments for
// GlobalResolution in LTO.h). Task numbers, however, start at
// ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
// through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
// generation partitions.
// Task numbers start at ParallelCodeGenParallelismLevel if an LTO
// module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1
// are reserved for parallel code generation partitions.
unsigned Task =
HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0;
unsigned Partition = 1;
for (auto &Mod : ThinLTO.ModuleMap) {
if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
ExportLists[Mod.first],
ResolvedODR[Mod.first], ThinLTO.ModuleMap))
return E;
++Task;
++Partition;
}
return BackendProc->wait();

View File

@ -76,8 +76,12 @@ namespace llvm {
compile-time arithmetic on PPC double-double numbers, it is not able
to represent all possible values held by a PPC double-double number,
for example: (long double) 1.0 + (long double) 0x1p-106
Should this be replaced by a full emulation of PPC double-double? */
static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0};
Should this be replaced by a full emulation of PPC double-double?
Note: we need to make the value different from semBogus as otherwise
an unsafe optimization may collapse both values to a single address,
and we heavily rely on them having distinct addresses. */
static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
/* There are temporary semantics for the real PPCDoubleDouble implementation.
Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the

View File

@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() {
.Case("POWER7", "pwr7")
.Case("POWER8", "pwr8")
.Case("POWER8E", "pwr8")
.Case("POWER8NVL", "pwr8")
.Case("POWER9", "pwr9")
.Default(generic);
}

View File

@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
N *= 100.0;
char Buf[32];
unsigned Len;
Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
if (Style == FloatStyle::Percent)
++Len;
format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
S << Buf;
if (Style == FloatStyle::Percent)
S << '%';

View File

@ -118,6 +118,18 @@ void Input::beginMapping() {
}
}
std::vector<StringRef> Input::keys() {
MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
std::vector<StringRef> Ret;
if (!MN) {
setError(CurrentNode, "not a mapping");
return Ret;
}
for (auto &P : MN->Mapping)
Ret.push_back(P.first());
return Ret;
}
bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
void *&SaveInfo) {
UseDefault = false;
@ -163,7 +175,7 @@ void Input::endMapping() {
if (!MN)
return;
for (const auto &NN : MN->Mapping) {
if (!MN->isValidKey(NN.first())) {
if (!is_contained(MN->ValidKeys, NN.first())) {
setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
break;
}
@ -373,14 +385,6 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
}
}
bool Input::MapHNode::isValidKey(StringRef Key) {
for (const char *K : ValidKeys) {
if (Key.equals(K))
return true;
}
return false;
}
void Input::setError(const Twine &Message) {
this->setError(CurrentNode, Message);
}
@ -451,6 +455,10 @@ void Output::endMapping() {
StateStack.pop_back();
}
std::vector<StringRef> Output::keys() {
report_fatal_error("invalid call");
}
bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
bool &UseDefault, void *&) {
UseDefault = false;

View File

@ -11,9 +11,15 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/StringMatcher.h"
#include <cassert>
#include <map>
#include <string>
#include <utility>
#include <vector>
using namespace llvm;
/// FindFirstNonCommonLetter - Find the first character in the keys of the
@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
}
// Bucket the matches by the character we are comparing.
std::map<char, std::vector<const StringPair*> > MatchesByLetter;
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
// FIXME: Need to escape general strings.
OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
<< ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
<< NumChars << "))\n";
<< NumChars << ") != 0)\n";
OS << Indent << " break;\n";
}
@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
OS << Indent << "default: break;\n";
for (std::map<char, std::vector<const StringPair*> >::iterator LI =
for (std::map<char, std::vector<const StringPair*>>::iterator LI =
MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
// TODO: escape hard stuff (like \n) if we ever care about it.
OS << Indent << "case '" << LI->first << "':\t // "
@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
return true;
}
/// Emit - Top level entry point.
///
void StringMatcher::Emit(unsigned Indent) const {
@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const {
if (Matches.empty()) return;
// First level categorization: group strings by length.
std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const {
OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
OS.indent(Indent*2+2) << "default: break;\n";
for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
<< LI->second.size()

View File

@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureNEON,
FeaturePerfMon
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing
]>;
def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",

View File

@ -76,7 +76,6 @@ public:
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
void EmitXRayTable();
void EmitSled(const MachineInstr &MI, SledKind Kind);
/// \brief tblgen'erated driver function for lowering simple MI->MC
@ -95,7 +94,7 @@ public:
AArch64FI = F.getInfo<AArch64FunctionInfo>();
STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
bool Result = AsmPrinter::runOnMachineFunction(F);
EmitXRayTable();
emitXRayTable();
return Result;
}
@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
EmitSled(MI, SledKind::TAIL_CALL);
}
void AArch64AsmPrinter::EmitXRayTable()
{
//TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
// code duplication as it is now for x86_64, ARM32 and AArch64.
if (Sleds.empty())
return;
auto PrevSection = OutStreamer->getCurrentSectionOnly();
auto Fn = MF->getFunction();
MCSection *Section;
if (STI->isTargetELF()) {
if (Fn->hasComdat())
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
Fn->getComdat()->getName());
else
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC);
} else if (STI->isTargetMachO()) {
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
// Before we switch over, we force a reference to a label inside the
// xray_instr_map section. Since EmitXRayTable() is always called just
// before the function's end, we assume that this is happening after the
// last return instruction.
//
// We then align the reference to 16 byte boundaries, which we determined
// experimentally to be beneficial to avoid causing decoder stalls.
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
OutStreamer->EmitCodeAlignment(16);
OutStreamer->EmitSymbolValue(Tmp, 8, false);
OutStreamer->SwitchSection(Section);
OutStreamer->EmitLabel(Tmp);
for (const auto &Sled : Sleds) {
OutStreamer->EmitSymbolValue(Sled.Sled, 8);
OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
auto Kind = static_cast<uint8_t>(Sled.Kind);
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Kind), 1));
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
OutStreamer->EmitZeros(14);
}
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}
void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
{
static const int8_t NoopsInSledCount = 7;

View File

@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
bool IsUnscaled = TII->isUnscaledLdSt(MI);
int Offset = getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
Offset -= OffsetStride;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return false;

View File

@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT)
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)

View File

@ -313,6 +313,7 @@ enum NodeType : unsigned {
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
SENDMSG,
SENDMSGHALT,
INTERP_MOV,
INTERP_P1,
INTERP_P2,

View File

@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue]>;

View File

@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::SI_sendmsg: {
case AMDGPUIntrinsic::SI_sendmsg:
case Intrinsic::amdgcn_s_sendmsg: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case Intrinsic::amdgcn_s_sendmsghalt: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case AMDGPUIntrinsic::SI_tbuffer_store: {
SDValue Ops[] = {
Chain,

View File

@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
LastInstWritesM0 = false;
return;
@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
ST->needWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG)
I->getOpcode() == AMDGPU::S_SENDMSG ||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
else
Required = handleOperands(*I);

View File

@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(AMDGPUsendmsg (i32 imm:$simm16))]
>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
[(AMDGPUsendmsghalt (i32 imm:$simm16))]
>;
} // End Uses = [EXEC, M0]
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;

View File

@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
// Emit the XRay table for this function.
EmitXRayTable();
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.

View File

@ -113,9 +113,6 @@ public:
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
// Helper function that emits the XRay sleds we've collected for a particular
// function.
void EmitXRayTable();
private:
void EmitSled(const MachineInstr &MI, SledKind Kind);

View File

@ -22,9 +22,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
{
EmitSled(MI, SledKind::TAIL_CALL);
}
void ARMAsmPrinter::EmitXRayTable()
{
if (Sleds.empty())
return;
MCSection *Section = nullptr;
if (Subtarget->isTargetELF()) {
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP |
ELF::SHF_MERGE,
0, CurrentFnSym->getName());
} else if (Subtarget->isTargetMachO()) {
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
auto PrevSection = OutStreamer->getCurrentSectionOnly();
OutStreamer->SwitchSection(Section);
for (const auto &Sled : Sleds) {
OutStreamer->EmitSymbolValue(Sled.Sled, 4);
OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
auto Kind = static_cast<uint8_t>(Sled.Kind);
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Kind), 1));
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
OutStreamer->EmitZeros(6);
}
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}

View File

@ -53,28 +53,36 @@
//
// The code below is intended to be fully target-independent.
#include "BitTracker.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "BitTracker.h"
#include <iterator>
#include <cassert>
#include <cstdint>
using namespace llvm;
typedef BitTracker BT;
namespace {
// Local trickery to pretty print a register (without the whole "%vreg"
// business).
struct printv {
printv(unsigned r) : R(r) {}
unsigned R;
};
raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
if (PV.R)
OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
@ -82,9 +90,11 @@ namespace {
OS << 's';
return OS;
}
}
} // end anonymous namespace
namespace llvm {
raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
switch (BV.Type) {
case BT::BitValue::Top:
@ -167,14 +177,14 @@ namespace llvm {
return OS;
}
}
} // end namespace llvm
void BitTracker::print_cells(raw_ostream &OS) const {
for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
}
BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
: Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
delete &Map;
}
// If we were allowed to update a cell for a part of a register, the meet
// operation would need to be parametrized by the register number and the
// exact part of the register, so that the computer BitRefs correspond to
@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
return Changed;
}
// Insert the entire cell RC into the current cell at position given by M.
BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
const BitMask &M) {
@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
return *this;
}
BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
uint16_t B = M.first(), E = M.last(), W = width();
assert(B < W && E < W);
@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
return RC;
}
BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
// Rotate left (i.e. towards increasing bit indices).
// Swap the two parts: [0..W-Sh-1] [W-Sh..W-1]
@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
return *this;
}
BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
const BitValue &V) {
assert(B <= E);
@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
return *this;
}
BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
// Append the cell given as the argument to the "this" cell.
// Bit 0 of RC becomes bit W of the result, where W is this->width().
@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
return *this;
}
uint16_t BT::RegisterCell::ct(bool B) const {
uint16_t W = width();
uint16_t C = 0;
@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
return C;
}
uint16_t BT::RegisterCell::cl(bool B) const {
uint16_t W = width();
uint16_t C = 0;
@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
return C;
}
bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
uint16_t W = Bits.size();
if (RC.Bits.size() != W)
@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
return true;
}
uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// The general problem is with finding a register class that corresponds
// to a given reference reg:sub. There can be several such classes, and
@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
return BW;
}
BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
const CellMapType &M) const {
uint16_t BW = getRegBitWidth(RR);
@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
return RegisterCell::top(BW);
}
void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
CellMapType &M) const {
// While updating the cell map can be done in a meaningful way for
@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
M[RR.Reg] = RC;
}
// Check if the cell represents a compile-time integer value.
bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
uint16_t W = A.width();
@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
return true;
}
// Convert a cell to the integer value. The result must fit in uint64_t.
uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
assert(isInt(A));
@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
return Val;
}
// Evaluator helper functions. These implement some common operation on
// register cells that can be used to implement target-specific instructions
// in a target-specific evaluator.
@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
const APInt &A = CI->getValue();
uint16_t BW = A.getBitWidth();
@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width() + A2.width();
uint16_t Z = A1.ct(0) + A2.ct(0);
uint16_t Z = A1.ct(false) + A2.ct(false);
RegisterCell Res(W);
Res.fill(0, Z, BitValue::Zero);
Res.fill(Z, W, BitValue::self());
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width() + A2.width();
uint16_t Z = A1.ct(0) + A2.ct(0);
uint16_t Z = A1.ct(false) + A2.ct(false);
RegisterCell Res(W);
Res.fill(0, Z, BitValue::Zero);
Res.fill(Z, W, BitValue::self());
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
uint16_t Sh) const {
assert(Sh <= A1.width());
@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
uint16_t Sh) const {
uint16_t W = A1.width();
@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
uint16_t Sh) const {
uint16_t W = A1.width();
@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
uint16_t W = A1.width();
RegisterCell Res(W);
@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
uint16_t BitN) const {
assert(BitN < A1.width());
@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
uint16_t BitN) const {
assert(BitN < A1.width());
@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
uint16_t W) const {
uint16_t C = A1.cl(B), AW = A1.width();
@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
return RegisterCell::self(0, W);
}
BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
uint16_t W) const {
uint16_t C = A1.ct(B), AW = A1.width();
@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
return RegisterCell::self(0, W);
}
BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
uint16_t FromN) const {
uint16_t W = A1.width();
@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
uint16_t FromN) const {
uint16_t W = A1.width();
@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
uint16_t B, uint16_t E) const {
uint16_t W = A1.width();
@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
return Res;
}
BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
const RegisterCell &A2, uint16_t AtN) const {
uint16_t W1 = A1.width(), W2 = A2.width();
@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
return Res;
}
BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
uint16_t W = getRegBitWidth(Reg);
@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
return true;
}
// Main W-Z implementation.
void BT::visitPHI(const MachineInstr &PI) {
@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
}
}
void BT::visitUsesOf(unsigned Reg) {
if (Trace)
dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
}
}
BT::RegisterCell BT::get(RegisterRef RR) const {
return ME.getCell(RR, Map);
}
void BT::put(RegisterRef RR, const RegisterCell &RC) {
ME.putCell(RR, RC, Map);
}
// Replace all references to bits from OldRR with the corresponding bits
// in NewRR.
void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
}
}
// Check if the block has been "executed" during propagation. (If not, the
// block is dead, but it may still appear to be reachable.)
bool BT::reached(const MachineBasicBlock *B) const {
@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
return false;
}
// Visit an individual instruction. This could be a newly added instruction,
// or one that has been modified by an optimization.
void BT::visit(const MachineInstr &MI) {
@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
FlowQ.pop();
}
void BT::reset() {
EdgeExec.clear();
InstrExec.clear();
Map.clear();
}
void BT::run() {
reset();
assert(FlowQ.empty());
@ -1141,4 +1106,3 @@ void BT::run() {
if (Trace)
print_cells(dbgs() << "Cells after propagation:\n");
}

View File

@ -1,4 +1,4 @@
//===--- BitTracker.h -----------------------------------------------------===//
//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -7,24 +7,27 @@
//
//===----------------------------------------------------------------------===//
#ifndef BITTRACKER_H
#define BITTRACKER_H
#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include <cassert>
#include <cstdint>
#include <map>
#include <queue>
#include <set>
#include <utility>
namespace llvm {
class ConstantInt;
class MachineRegisterInfo;
class MachineBasicBlock;
class MachineInstr;
class MachineOperand;
class raw_ostream;
class ConstantInt;
class MachineRegisterInfo;
class MachineBasicBlock;
class MachineInstr;
class raw_ostream;
struct BitTracker {
struct BitRef;
@ -76,19 +79,19 @@ private:
CellMapType &Map;
};
// Abstraction of a reference to bit at position Pos from a register Reg.
struct BitTracker::BitRef {
BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
bool operator== (const BitRef &BR) const {
// If Reg is 0, disregard Pos.
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
}
unsigned Reg;
uint16_t Pos;
};
// Abstraction of a register reference in MachineOperand. It contains the
// register number and the subregister index.
struct BitTracker::RegisterRef {
@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
: Reg(R), Sub(S) {}
RegisterRef(const MachineOperand &MO)
: Reg(MO.getReg()), Sub(MO.getSubReg()) {}
unsigned Reg, Sub;
};
// Value that a single bit can take. This is outside of the context of
// any register, it is more of an abstraction of the two-element set of
// possible bit values. One extension here is the "Ref" type, which
@ -158,6 +161,7 @@ struct BitTracker::BitValue {
bool operator!= (const BitValue &V) const {
return !operator==(V);
}
bool is(unsigned T) const {
assert(T == 0 || T == 1);
return T == 0 ? Type == Zero
@ -209,6 +213,7 @@ struct BitTracker::BitValue {
bool num() const {
return Type == Zero || Type == One;
}
operator bool() const {
assert(Type == Zero || Type == One);
return Type == One;
@ -217,7 +222,6 @@ struct BitTracker::BitValue {
friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
};
// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
inline BitTracker::BitValue
BitTracker::BitValue::ref(const BitValue &V) {
@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) {
return self();
}
inline BitTracker::BitValue
BitTracker::BitValue::self(const BitRef &Self) {
return BitValue(Self.Reg, Self.Pos);
}
// A sequence of bits starting from index B up to and including index E.
// If E < B, the mask represents two sections: [0..E] and [B..W) where
// W is the width of the register.
struct BitTracker::BitMask {
BitMask() : B(0), E(0) {}
BitMask() = default;
BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
uint16_t first() const { return B; }
uint16_t last() const { return E; }
private:
uint16_t B, E;
};
private:
uint16_t B = 0;
uint16_t E = 0;
};
// Representation of a register: a list of BitValues.
struct BitTracker::RegisterCell {
@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
uint16_t width() const {
return Bits.size();
}
const BitValue &operator[](uint16_t BitN) const {
assert(BitN < Bits.size());
return Bits[BitN];
@ -297,12 +302,10 @@ private:
friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
};
inline bool BitTracker::has(unsigned Reg) const {
return Map.find(Reg) != Map.end();
}
inline const BitTracker::RegisterCell&
BitTracker::lookup(unsigned Reg) const {
CellMapType::const_iterator F = Map.find(Reg);
@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
return F->second;
}
inline BitTracker::RegisterCell
BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
RegisterCell RC(Width);
@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
return RC;
}
inline BitTracker::RegisterCell
BitTracker::RegisterCell::top(uint16_t Width) {
RegisterCell RC(Width);
@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
return RC;
}
inline BitTracker::RegisterCell
BitTracker::RegisterCell::ref(const RegisterCell &C) {
uint16_t W = C.width();
@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
struct BitTracker::MachineEvaluator {
MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
: TRI(T), MRI(M) {}
virtual ~MachineEvaluator() {}
virtual ~MachineEvaluator() = default;
uint16_t getRegBitWidth(const RegisterRef &RR) const;
RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
// A result of any operation should use refs to the source cells, not
// the cells directly. This function is a convenience wrapper to quickly
// generate a ref for a cell corresponding to a register reference.
@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {
} // end namespace llvm
#endif
#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H

View File

@ -7,16 +7,30 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "Hexagon.h"
#include "HexagonBitTracker.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonTargetMachine.h"
#include "HexagonBitTracker.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <utility>
#include <vector>
using namespace llvm;
@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
}
}
BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
using namespace Hexagon;
if (Sub == 0)
return MachineEvaluator::mask(Reg, 0);
using namespace Hexagon;
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
unsigned ID = RC->getID();
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
}
namespace {
class RegisterRefs {
std::vector<BT::RegisterRef> Vector;
@ -117,17 +132,21 @@ public:
}
size_t size() const { return Vector.size(); }
const BT::RegisterRef &operator[](unsigned n) const {
// The main purpose of this operator is to assert with bad argument.
assert(n < Vector.size());
return Vector[n];
}
};
}
} // end anonymous namespace
bool HexagonEvaluator::evaluate(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
using namespace Hexagon;
unsigned NumDefs = 0;
// Sanity verification: there should not be any defs with subregisters.
@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
if (NumDefs == 0)
return false;
using namespace Hexagon;
unsigned Opc = MI.getOpcode();
if (MI.mayLoad()) {
@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case S2_cl0:
case S2_cl0p:
// Always produce a 32-bit result.
return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
case S2_cl1:
case S2_cl1p:
return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
case S2_clb:
case S2_clbp: {
uint16_t W1 = getRegBitWidth(Reg[1]);
@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
case S2_ct0:
case S2_ct0p:
return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
case S2_ct1:
case S2_ct1p:
return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
case S5_popcountp:
// TODO
break;
@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
using namespace Hexagon;
if (TII.isPredicated(MI))
return false;
assert(MI.mayLoad() && "A load that mayn't?");
@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
uint16_t BitNum;
bool SignEx;
using namespace Hexagon;
switch (Opc) {
default:
@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
return true;
}
unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
using namespace Hexagon;
bool Is64 = DoubleRegsRegClass.contains(PReg);
assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
}
unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
typedef MachineRegisterInfo::livein_iterator iterator;
for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {

View File

@ -1,4 +1,4 @@
//===--- HexagonBitTracker.h ----------------------------------------------===//
//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -7,15 +7,17 @@
//
//===----------------------------------------------------------------------===//
#ifndef HEXAGONBITTRACKER_H
#define HEXAGONBITTRACKER_H
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
#include "BitTracker.h"
#include "llvm/ADT/DenseMap.h"
#include <cstdint>
namespace llvm {
class HexagonInstrInfo;
class HexagonRegisterInfo;
class HexagonInstrInfo;
class HexagonRegisterInfo;
struct HexagonEvaluator : public BitTracker::MachineEvaluator {
typedef BitTracker::CellMapType CellMapType;
@ -49,10 +51,12 @@ private:
// Type of formal parameter extension.
struct ExtType {
enum { SExt, ZExt };
char Type;
uint16_t Width;
ExtType() : Type(0), Width(0) {}
ExtType() = default;
ExtType(char t, uint16_t w) : Type(t), Width(w) {}
char Type = 0;
uint16_t Width = 0;
};
// Map VR -> extension type.
typedef DenseMap<unsigned, ExtType> RegExtMap;
@ -61,4 +65,4 @@ private:
} // end namespace llvm
#endif
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H

File diff suppressed because it is too large Load Diff

View File

@ -16,9 +16,14 @@
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <cstdint>
#include <vector>
#define GET_INSTRINFO_HEADER
#include "HexagonGenInstrInfo.inc"
@ -29,9 +34,10 @@ struct EVT;
class HexagonSubtarget;
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
virtual void anchor();
public:
explicit HexagonInstrInfo(HexagonSubtarget &ST);
@ -260,7 +266,7 @@ public:
/// PredCost.
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr &MI,
unsigned *PredCost = 0) const override;
unsigned *PredCost = nullptr) const override;
/// Create machine specific model for scheduling.
DFAPacketizer *
@ -378,7 +384,6 @@ public:
bool PredOpcodeHasJMP_c(unsigned Opcode) const;
bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
short getAbsoluteForm(const MachineInstr &MI) const;
unsigned getAddrMode(const MachineInstr &MI) const;
unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
@ -421,13 +426,11 @@ public:
unsigned getUnits(const MachineInstr &MI) const;
unsigned getValidSubTargets(const unsigned Opcode) const;
/// getInstrTimingClassLatency - Compute the instruction latency of a given
/// instruction using Timing Class information, if available.
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
void immediateExtend(MachineInstr &MI) const;
bool invertAndChangeJumpTarget(MachineInstr &MI,
MachineBasicBlock* NewTarget) const;
@ -438,6 +441,6 @@ public:
short xformRegToImmOffset(const MachineInstr &MI) const;
};
}
} // end namespace llvm
#endif
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H

View File

@ -15,33 +15,31 @@
namespace llvm {
namespace Hexagon {
namespace Hexagon {
const unsigned int StartPacket = 0x1;
const unsigned int EndPacket = 0x2;
}
} // end namespace Hexagon
/// Hexagon target-specific information for each MachineFunction.
class HexagonMachineFunctionInfo : public MachineFunctionInfo {
// SRetReturnReg - Some subtargets require that sret lowering includes
// returning the value of the returned struct in a register. This field
// holds the virtual register into which the sret argument is passed.
unsigned SRetReturnReg;
unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual)
unsigned StackAlignBasePhysReg; // (physical)
unsigned SRetReturnReg = 0;
unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
unsigned StackAlignBasePhysReg = 0; // (physical)
int VarArgsFrameIndex;
bool HasClobberLR;
bool HasEHReturn;
bool HasClobberLR = false;
bool HasEHReturn = false;
std::map<const MachineInstr*, unsigned> PacketInfo;
virtual void anchor();
public:
HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
HexagonMachineFunctionInfo() = default;
HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
HasEHReturn(false) {}
HexagonMachineFunctionInfo(MachineFunction &MF) {}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@ -75,6 +73,7 @@ public:
void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
};
} // End llvm namespace
#endif
} // end namespace llvm
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H

View File

@ -10,17 +10,27 @@
// This file contains the declarations of the HexagonTargetAsmInfo properties.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-sdata"
#include "HexagonTargetMachine.h"
#include "HexagonTargetObjectFile.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
// (e.g. -debug and -debug-only=globallayout)
#define TRACE_TO(s, X) s << X
#ifdef NDEBUG
#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
#define TRACE(X) \
do { \
if (TraceGVPlacement) { \
TRACE_TO(errs(), X); \
} \
} while (false)
#else
#define TRACE(X) \
do { \
if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
else { DEBUG( TRACE_TO(dbgs(), X) ); } \
} while (0)
#define TRACE(X) \
do { \
if (TraceGVPlacement) { \
TRACE_TO(errs(), X); \
} else { \
DEBUG(TRACE_TO(dbgs(), X)); \
} \
} while (false)
#endif
// Returns true if the section name is such that the symbol will be put
@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
Sec.find(".scommon.") != StringRef::npos;
}
static const char *getSectionSuffixForSize(unsigned Size) {
switch (Size) {
default:
@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
}
/// Return true if this global value should be placed into small data/bss
/// section.
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return true;
}
bool HexagonTargetObjectFile::isSmallDataEnabled() const {
return SmallDataThreshold > 0;
}
unsigned HexagonTargetObjectFile::getSmallDataSize() const {
return SmallDataThreshold;
}
/// Descends any type down to "elementary" components,
/// discovering the smallest addressable one.
/// If zero is returned, declaration will not be modified.

View File

@ -1,5 +1,4 @@
//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===//
//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===//
//
// The LLVM Compiler Infrastructure
//
@ -11,18 +10,17 @@
// This file is looks at a packet and tries to form compound insns
//
//===----------------------------------------------------------------------===//
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
using namespace llvm;
using namespace Hexagon;
@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
};
// enum HexagonII::CompoundGroup
namespace {
unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
switch (MI.getOpcode()) {
@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
return HexagonII::HCG_None;
}
}
/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
namespace {
unsigned getCompoundOp(MCInst const &HMCI) {
static unsigned getCompoundOp(MCInst const &HMCI) {
const MCOperand &Predicate = HMCI.getOperand(0);
unsigned PredReg = Predicate.getReg();
@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
}
}
}
namespace {
MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
MCInst *CompoundInsn = 0;
static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
MCInst const &R) {
MCInst *CompoundInsn = nullptr;
unsigned compoundOpcode;
MCOperand Rs, Rt;
int64_t Value;
@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
return CompoundInsn;
}
}
/// Non-Symmetrical. See if these two instructions are fit for compound pair.
namespace {
bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
MCInst const &MIb, bool IsExtendedB) {
static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
MCInst const &MIb, bool IsExtendedB) {
unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
// We have two candidates - check that this is the same register
@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
(MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
}
}
namespace {
bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
MCInst &MCI) {
assert(HexagonMCInstrInfo::isBundle(MCI));
bool JExtended = false;
for (MCInst::iterator J =
@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
JExtended = true;
continue;
}
if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
HexagonII::TypeJ) {
if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
// Try to pair with another insn (B)undled with jump.
bool BExtended = false;
for (MCInst::iterator B =
@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
}
return false;
}
}
/// tryCompound - Given a bundle check for compound insns when one
/// is found update the contents fo the bundle with the compound insn.
@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
// a compound is found.
while (lookForCompound(MCII, Context, MCI))
;
return;
}

View File

@ -1,4 +1,4 @@
//===--- RDFCopy.h --------------------------------------------------------===//
//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -7,23 +7,26 @@
//
//===----------------------------------------------------------------------===//
#ifndef RDF_COPY_H
#define RDF_COPY_H
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#include "RDFGraph.h"
#include <map>
#include <vector>
namespace llvm {
class MachineBasicBlock;
class MachineDominatorTree;
class MachineInstr;
namespace rdf {
struct CopyPropagation {
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
Trace(false) {}
virtual ~CopyPropagation() {}
virtual ~CopyPropagation() = default;
bool run();
void trace(bool On) { Trace = On; }
@ -49,7 +52,9 @@ namespace rdf {
void updateMap(NodeAddr<InstrNode*> IA);
bool scanBlock(MachineBasicBlock *B);
};
} // namespace rdf
} // namespace llvm
#endif
} // end namespace rdf
} // end namespace llvm
#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H

View File

@ -10,16 +10,31 @@
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
#include "RDFGraph.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <utility>
#include <vector>
using namespace llvm;
using namespace rdf;
@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
return OS;
}
namespace {
void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
const DataFlowGraph &G) {
OS << Print<NodeId>(RA.Id, G) << '<'
<< Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
OS << '!';
}
static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
const DataFlowGraph &G) {
OS << Print<NodeId>(RA.Id, G) << '<'
<< Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
OS << '!';
}
template<>
@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
}
namespace {
template <typename T>
struct PrintListV {
PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
typedef T Type;
const NodeList &List;
const DataFlowGraph &G;
@ -201,7 +216,8 @@ namespace {
}
return OS;
}
}
} // end anonymous namespace
template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
// Print the target for calls and branches (for readability).
if (MI.isCall() || MI.isBranch()) {
MachineInstr::const_mop_iterator T =
find_if(MI.operands(),
[] (const MachineOperand &Op) -> bool {
return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
});
llvm::find_if(MI.operands(),
[] (const MachineOperand &Op) -> bool {
return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
});
if (T != MI.operands_end()) {
OS << ' ';
if (T->isMBB())
@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
} // namespace rdf
} // namespace llvm
} // end namespace rdf
} // end namespace llvm
// Node allocation functions.
//
@ -390,7 +406,6 @@ void NodeAllocator::clear() {
ActiveEnd = nullptr;
}
// Insert node NA after "this" in the circular chain.
void NodeBase::append(NodeAddr<NodeBase*> NA) {
NodeId Nx = Next;
@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
}
}
// Fundamental node manipulator functions.
// Obtain the register reference from a reference node.
@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
return findBlock(EntryB, G);
}
// Target operand information.
//
@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
return false;
}
RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
RegisterId SuperReg = RR.Reg;
while (true) {
@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
OS << " }";
}
//
// The data flow graph construction.
//
@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
: LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
: MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
}
// The implementation of the definition stack.
// Each register reference has its own definition stack. In particular,
// for a register references "Reg" and "Reg:subreg" will each have their
@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
return P;
}
// Register information.
// Get the list of references aliased to RR. Lane masks are ignored.
@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
return NA;
}
// Allocation routines for specific node types/kinds.
NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
return false;
}
// Clear all information in the graph.
void DataFlowGraph::reset() {
Memory.clear();
@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
Func = NodeAddr<FuncNode*>();
}
// Return the next reference node in the instruction node IA that is related
// to RA. Conceptually, two reference nodes are related if they refer to the
// same instance of a register access, but differ in flags or other minor

View File

@ -1,4 +1,4 @@
//===--- RDFGraph.h -------------------------------------------------------===//
//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -221,20 +221,25 @@
// The statement s5 has two use nodes for t0: u7" and u9". The quotation
// mark " indicates that the node is a shadow.
//
#ifndef RDF_GRAPH_H
#define RDF_GRAPH_H
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
#include <cstdint>
#include <cstring>
#include <functional>
#include <map>
#include <set>
#include <unordered_map>
#include <utility>
#include <vector>
// RDF uses uint32_t to refer to registers. This is to ensure that the type
@ -243,6 +248,7 @@
static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
namespace llvm {
class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
@ -252,6 +258,7 @@ namespace llvm {
class TargetInstrInfo;
namespace rdf {
typedef uint32_t NodeId;
typedef uint32_t RegisterId;
@ -293,9 +300,11 @@ namespace rdf {
static uint16_t set_type(uint16_t A, uint16_t T) {
return (A & ~TypeMask) | T;
}
static uint16_t set_kind(uint16_t A, uint16_t K) {
return (A & ~KindMask) | K;
}
static uint16_t set_flags(uint16_t A, uint16_t F) {
return (A & ~FlagMask) | F;
}
@ -326,9 +335,14 @@ namespace rdf {
};
template <typename T> struct NodeAddr {
NodeAddr() : Addr(nullptr), Id(0) {}
NodeAddr() : Addr(nullptr) {}
NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
// Type cast (casting constructor). The reason for having this class
// instead of std::pair.
template <typename S> NodeAddr(const NodeAddr<S> &NA)
: Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
bool operator== (const NodeAddr<T> &NA) const {
assert((Addr == NA.Addr) == (Id == NA.Id));
return Addr == NA.Addr;
@ -336,13 +350,9 @@ namespace rdf {
bool operator!= (const NodeAddr<T> &NA) const {
return !operator==(NA);
}
// Type cast (casting constructor). The reason for having this class
// instead of std::pair.
template <typename S> NodeAddr(const NodeAddr<S> &NA)
: Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
T Addr;
NodeId Id;
NodeId Id = 0;
};
struct NodeBase;
@ -366,17 +376,20 @@ namespace rdf {
struct NodeAllocator {
// Amount of storage for a single node.
enum { NodeMemSize = 32 };
NodeAllocator(uint32_t NPB = 4096)
: NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
IndexMask((1 << BitsPerIndex)-1) {
assert(isPowerOf2_32(NPB));
}
NodeBase *ptr(NodeId N) const {
uint32_t N1 = N-1;
uint32_t BlockN = N1 >> BitsPerIndex;
uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
}
NodeId id(const NodeBase *P) const;
NodeAddr<NodeBase*> New();
void clear();
@ -384,6 +397,7 @@ namespace rdf {
private:
void startNewBlock();
bool needNewBlock();
uint32_t makeId(uint32_t Block, uint32_t Index) const {
// Add 1 to the id, to avoid the id of 0, which is treated as "null".
return ((Block << BitsPerIndex) | Index) + 1;
@ -392,7 +406,7 @@ namespace rdf {
const uint32_t NodesPerBlock;
const uint32_t BitsPerIndex;
const uint32_t IndexMask;
char *ActiveEnd;
char *ActiveEnd = nullptr;
std::vector<char*> Blocks;
typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
AllocatorTy MemPool;
@ -405,6 +419,7 @@ namespace rdf {
RegisterRef() : RegisterRef(0) {}
explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
: Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
operator bool() const { return Reg != 0 && Mask.any(); }
bool operator== (const RegisterRef &RR) const {
return Reg == RR.Reg && Mask == RR.Mask;
@ -420,7 +435,8 @@ namespace rdf {
struct TargetOperandInfo {
TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
virtual ~TargetOperandInfo() {}
virtual ~TargetOperandInfo() = default;
virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
@ -428,7 +444,6 @@ namespace rdf {
const TargetInstrInfo &TII;
};
// Packed register reference. Only used for storage.
struct PackedRegisterRef {
RegisterId Reg;
@ -442,11 +457,13 @@ namespace rdf {
template <typename T, unsigned N = 32>
struct IndexedSet {
IndexedSet() : Map() { Map.reserve(N); }
T get(uint32_t Idx) const {
// Index Idx corresponds to Map[Idx-1].
assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
return Map[Idx-1];
}
uint32_t insert(T Val) {
// Linear search.
auto F = llvm::find(Map, Val);
@ -455,11 +472,13 @@ namespace rdf {
Map.push_back(Val);
return Map.size(); // Return actual_index + 1.
}
uint32_t find(T Val) const {
auto F = llvm::find(Map, Val);
assert(F != Map.end());
return F - Map.begin();
}
private:
std::vector<T> Map;
};
@ -478,12 +497,14 @@ namespace rdf {
assert(LM.any());
return LM.all() ? 0 : find(LM);
}
PackedRegisterRef pack(RegisterRef RR) {
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
}
PackedRegisterRef pack(RegisterRef RR) const {
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
}
RegisterRef unpack(PackedRegisterRef PR) const {
return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
}
@ -491,11 +512,8 @@ namespace rdf {
struct RegisterAggr {
RegisterAggr(const TargetRegisterInfo &tri)
: Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
TRI(tri) {}
RegisterAggr(const RegisterAggr &RG)
: Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
: ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
RegisterAggr(const RegisterAggr &RG) = default;
bool empty() const { return Masks.empty(); }
bool hasAliasOf(RegisterRef RR) const;
@ -530,11 +548,11 @@ namespace rdf {
const TargetRegisterInfo &TRI;
};
struct NodeBase {
public:
// Make sure this is a POD.
NodeBase() = default;
uint16_t getType() const { return NodeAttrs::type(Attrs); }
uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
@ -596,29 +614,36 @@ namespace rdf {
struct RefNode : public NodeBase {
RefNode() = default;
RegisterRef getRegRef(const DataFlowGraph &G) const;
MachineOperand &getOp() {
assert(!(getFlags() & NodeAttrs::PhiRef));
return *Ref.Op;
}
void setRegRef(RegisterRef RR, DataFlowGraph &G);
void setRegRef(MachineOperand *Op, DataFlowGraph &G);
NodeId getReachingDef() const {
return Ref.RD;
}
void setReachingDef(NodeId RD) {
Ref.RD = RD;
}
NodeId getSibling() const {
return Ref.Sib;
}
void setSibling(NodeId Sib) {
Ref.Sib = Sib;
}
bool isUse() const {
assert(getType() == NodeAttrs::Ref);
return getKind() == NodeAttrs::Use;
}
bool isDef() const {
assert(getType() == NodeAttrs::Ref);
return getKind() == NodeAttrs::Def;
@ -702,6 +727,7 @@ namespace rdf {
MachineBasicBlock *getCode() const {
return CodeNode::getCode<MachineBasicBlock*>();
}
void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
};
@ -709,6 +735,7 @@ namespace rdf {
MachineFunction *getCode() const {
return CodeNode::getCode<MachineFunction*>();
}
NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
const DataFlowGraph &G) const;
NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
@ -723,6 +750,7 @@ namespace rdf {
template <typename T> T ptr(NodeId N) const {
return static_cast<T>(ptr(N));
}
NodeId id(const NodeBase *P) const;
template <typename T> NodeAddr<T> addr(NodeId N) const {
@ -738,13 +766,17 @@ namespace rdf {
struct DefStack {
DefStack() = default;
bool empty() const { return Stack.empty() || top() == bottom(); }
private:
typedef NodeAddr<DefNode*> value_type;
struct Iterator {
typedef DefStack::value_type value_type;
Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
value_type operator*() const {
assert(Pos >= 1);
return DS.Stack[Pos-1];
@ -755,14 +787,17 @@ namespace rdf {
}
bool operator==(const Iterator &It) const { return Pos == It.Pos; }
bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
private:
Iterator(const DefStack &S, bool Top);
// Pos-1 is the index in the StorageType object that corresponds to
// the top of the DefStack.
const DefStack &DS;
unsigned Pos;
friend struct DefStack;
};
public:
typedef Iterator iterator;
iterator top() const { return Iterator(*this, true); }
@ -773,14 +808,18 @@ namespace rdf {
void pop();
void start_block(NodeId N);
void clear_block(NodeId N);
private:
friend struct Iterator;
typedef std::vector<value_type> StorageType;
bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
return (P.Addr == nullptr) && (N == 0 || P.Id == N);
}
unsigned nextUp(unsigned P) const;
unsigned nextDown(unsigned P) const;
StorageType Stack;
};
@ -819,6 +858,7 @@ namespace rdf {
if (RemoveFromOwner)
removeFromOwner(UA);
}
void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
unlinkDefDF(DA);
if (RemoveFromOwner)
@ -831,23 +871,28 @@ namespace rdf {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == Kind;
}
template <uint16_t Kind>
static bool IsCode(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Code &&
BA.Addr->getKind() == Kind;
}
static bool IsDef(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == NodeAttrs::Def;
}
static bool IsUse(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == NodeAttrs::Use;
}
static bool IsPhi(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Code &&
BA.Addr->getKind() == NodeAttrs::Phi;
}
static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
uint16_t Flags = DA.Addr->getFlags();
return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
@ -902,6 +947,7 @@ namespace rdf {
void unlinkUseDF(NodeAddr<UseNode*> UA);
void unlinkDefDF(NodeAddr<DefNode*> DA);
void removeFromOwner(NodeAddr<RefNode*> RA) {
NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
IA.Addr->removeMember(RA, *this);
@ -967,7 +1013,6 @@ namespace rdf {
return MM;
}
// Optionally print the lane mask, if it is not ~0.
struct PrintLaneMaskOpt {
PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
@ -991,7 +1036,9 @@ namespace rdf {
PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
: Print<NodeAddr<T>>(x, g) {}
};
} // namespace rdf
} // namespace llvm
#endif // RDF_GRAPH_H
} // end namespace rdf
} // end namespace llvm
#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H

View File

@ -28,6 +28,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
SelectionDAGISel::getAnalysisUsage(AU);
}
void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF) {
MachineInstrBuilder MIB(MF, &MI);

View File

@ -28,6 +28,8 @@ private:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF);

View File

@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
static bool isFunctionGlobalAddress(SDValue Callee);
static bool
resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
resideInSameSection(const Function *Caller, SDValue Callee,
const TargetMachine &TM) {
// If !G, Callee can be an external symbol.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (!G) return false;
const GlobalValue *GV = G->getGlobal();
if (GV->isDeclaration()) return false;
switch(GV->getLinkage()) {
default: llvm_unreachable("unknow linkage type");
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::ExternalWeakLinkage:
if (!G)
return false;
// Callee with weak linkage is allowed if it has hidden or protected
// visibility
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
if (GV->hasDefaultVisibility())
return false;
const GlobalValue *GV = G->getGlobal();
if (!GV->isStrongDefinitionForLinker())
return false;
case GlobalValue::ExternalLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
break;
// Any explicitly-specified sections and section prefixes must also match.
// Also, if we're using -ffunction-sections, then each function is always in
// a different section (the same is true for COMDAT functions).
if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
GV->getSection() != Caller->getSection())
return false;
if (const auto *F = dyn_cast<Function>(GV)) {
if (F->getSectionPrefix() != Caller->getSectionPrefix())
return false;
}
// With '-fPIC', calling default visiblity function need insert 'nop' after
// function call, no matter that function resides in same module or not, so
// we treat it as in different module.
if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
// If the callee might be interposed, then we can't assume the ultimate call
// target will be in the same section. Even in cases where we can assume that
// interposition won't happen, in any case where the linker might insert a
// stub to allow for interposition, we must generate code as though
// interposition might occur. To understand why this matters, consider a
// situation where: a -> b -> c where the arrows indicate calls. b and c are
// in the same section, but a is in a different module (i.e. has a different
// TOC base pointer). If the linker allows for interposition between b and c,
// then it will generate a stub for the call edge between b and c which will
// save the TOC pointer into the designated stack slot allocated by b. If we
// return true here, and therefore allow a tail call between b and c, that
// stack slot won't exist and the b -> c stub will end up saving b'c TOC base
// pointer into the stack slot allocated by a (where the a -> b stub saved
// a's TOC base pointer). If we're not considering a tail call, but rather,
// whether a nop is needed after the call instruction in b, because the linker
// will insert a stub, it might complain about a missing nop if we omit it
// (although many don't complain in this case).
if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
return false;
return true;
@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
!isa<ExternalSymbolSDNode>(Callee))
return false;
// Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
// (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
// module.
// Check if Callee resides in the same section, because for now, PPC64 SVR4
// ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
// section.
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
return CallOpc;
}
static
bool isLocalCall(const SDValue &Callee)
{
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return G->getGlobal()->isStrongDefinitionForLinker();
return false;
}
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
MachineFunction &MF = DAG.getMachineFunction();
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
!isPatchPoint) {
if (CallOpc == PPCISD::BCTRL) {
@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
// The address needs to go after the chain input but before the flag (or
// any other variadic arguments).
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_))
} else if (CallOpc == PPCISD::CALL &&
!resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);

View File

@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
EmitFunctionBody();
// Emit the XRay table for this function.
EmitXRayTable();
emitXRayTable();
// We didn't modify anything.
return false;

View File

@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
: std::next(MBBI);
PI = skipDebugInstructionsBackward(PI, MBB.begin());
if (NI != nullptr)
NI = skipDebugInstructionsForward(NI, MBB.end());
unsigned Opc = PI->getOpcode();
int Offset = 0;
@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
I = MBB.erase(I);
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
if (!reserveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
if (HasDwarfEHHandlers && !isDestroy &&
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
BuildCFI(MBB, I, DL,
BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
if (Amount == 0)
@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// If this is a callee-pop calling convention, emit a CFA adjust for
// the amount the callee popped.
if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
BuildCFI(MBB, I, DL,
BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
// Add Amount to SP to destroy a frame, or subtract to setup.
@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// Merge with any previous or following adjustment instruction. Note: the
// instructions merged with here do not have CFI, so their stack
// adjustments do not feed into CfaAdjustment.
StackAdjustment += mergeSPUpdates(MBB, I, true);
StackAdjustment += mergeSPUpdates(MBB, I, false);
StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
if (StackAdjustment) {
if (!(Fn->optForMinSize() &&
adjustStackWithPops(MBB, I, DL, StackAdjustment)))
BuildStackAdjustment(MBB, I, DL, StackAdjustment,
adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
/*InEpilogue=*/false);
}
}
@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// TODO: When not using precise CFA, we also need to adjust for the
// InternalAmt here.
if (CfaAdjustment) {
BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
nullptr, CfaAdjustment));
BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr,
CfaAdjustment));
}
}

View File

@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
// TODO: If minimizing size and one of the inputs is a zero vector and the
// the zero vector has only one use, we could use a VPERM2X128 to save the
// instruction bytes needed to explicitly generate the zero vector.
@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// [6] - ignore
// [7] - zero high half of destination
int MaskLO = Mask[0];
if (MaskLO == SM_SentinelUndef)
MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
int MaskHI = Mask[2];
if (MaskHI == SM_SentinelUndef)
MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
unsigned PermMask = MaskLO | (MaskHI << 4);
// If either input is a zero vector, replace it with an undef input.
// Shuffle mask values < 4 are selecting elements of V1.
@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// selecting the zero vector and setting the zero mask bit.
if (IsV1Zero) {
V1 = DAG.getUNDEF(VT);
if (MaskLO < 4)
if (MaskLO < 2)
PermMask = (PermMask & 0xf0) | 0x08;
if (MaskHI < 4)
if (MaskHI < 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
if (IsV2Zero) {
V2 = DAG.getUNDEF(VT);
if (MaskLO >= 4)
if (MaskLO >= 2)
PermMask = (PermMask & 0xf0) | 0x08;
if (MaskHI >= 4)
if (MaskHI >= 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
// Insure elements came from the same Op.
int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
if (WidenedMask[i] == SM_SentinelZero)
return SDValue();
if (WidenedMask[i] == SM_SentinelUndef)
// Check for patterns which can be matched with a single insert of a 256-bit
// subvector.
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
{0, 1, 2, 3, 0, 1, 2, 3});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
{0, 1, 2, 3, 8, 9, 10, 11})) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0, DL));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
OnlyUsesV1 ? V1 : V2,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
assert(WidenedMask.size() == 4);
// See if this is an insertion of the lower 128-bits of V2 into V1.
bool IsInsert = true;
int V2Index = -1;
for (int i = 0; i < 4; ++i) {
assert(WidenedMask[i] >= -1);
if (WidenedMask[i] < 0)
continue;
SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
unsigned OpIndex = (i < Size/2) ? 0 : 1;
// Make sure all V1 subvectors are in place.
if (WidenedMask[i] < 4) {
if (WidenedMask[i] != i) {
IsInsert = false;
break;
}
} else {
// Make sure we only have a single V2 index and its the lowest 128-bits.
if (V2Index >= 0 || WidenedMask[i] != 4) {
IsInsert = false;
break;
}
V2Index = i;
}
}
if (IsInsert && V2Index >= 0) {
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
DAG.getIntPtrConstant(0, DL));
return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
}
// Try to lower to to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
unsigned PermMask = 0;
// Insure elements came from the same Op.
for (int i = 0; i < 4; ++i) {
assert(WidenedMask[i] >= -1);
if (WidenedMask[i] < 0)
continue;
SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
else if (Ops[OpIndex] != Op)
return SDValue();
}
// Form a 128-bit permutation.
// Convert the 64-bit shuffle mask selection values into 128-bit selection
// bits defined by a vshuf64x2 instruction's immediate control byte.
unsigned PermMask = 0, Imm = 0;
unsigned ControlBitsNum = WidenedMask.size() / 2;
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
// Use first element in place of undef mask.
Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
// Convert the 128-bit shuffle mask selection values into 128-bit selection
// bits defined by a vshuf64x2 instruction's immediate control byte.
PermMask |= (WidenedMask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
int NumElements = Mask.size();
int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
int NumV1Elements = 0, NumV2Elements = 0;
for (int M : Mask)
if (M < 0)
++NumSentinelElements;
continue;
else if (M < NumElements)
++NumV1Elements;
else
@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
case INTR_TYPE_3OP_MASK:
case INSERT_SUBVEC: {
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
else if (IntrData->Type == INSERT_SUBVEC) {
// imm should be adapted to ISD::INSERT_SUBVECTOR behavior
assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
Imm *= Src2.getSimpleValueType().getVectorNumElements();
Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
}
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
case ISD::INSERT_SUBVECTOR: {
unsigned EltSize = EltVT.getSizeInBits();
if (EltSize != 32 && EltSize != 64)
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
if (VT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
DCI.AddToWorklist(Op0.getNode());
// Op1 needs to be bitcasted to a smaller vector with the same element type.
SDValue Op1 = Op.getOperand(1);
MVT Op1VT = MVT::getVectorVT(EltVT,
Op1.getSimpleValueType().getSizeInBits() / EltSize);
Op1 = DAG.getBitcast(Op1VT, Op1);
DCI.AddToWorklist(Op1.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0, Op1,
DAG.getConstant(Imm, DL, MVT::i8)));
return true;
}
}
return false;
@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
/// the codegen.
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SDLoc &DL) {
assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
SDValue Src = N->getOperand(0);
unsigned Opcode = Src.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
// TODO: Add extra cases where we can truncate both inputs for the
// cost of one (or none).
// e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
if (Op0 == Op1)
return true;
SDValue BC0 = peekThroughOneUseBitcasts(Op0);
SDValue BC1 = peekThroughOneUseBitcasts(Op1);
return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
};
// Don't combine if the operation has other uses.
if (!N->isOnlyUserOf(Src.getNode()))
return SDValue();
// Only support vector truncation for now.
// TODO: i64 scalar math would benefit as well.
if (!VT.isVector())
return SDValue();
// In most cases its only worth pre-truncating if we're only facing the cost
// of one truncation.
// i.e. if one of the inputs will constant fold or the input is repeated.
switch (Opcode) {
case ISD::AND:
case ISD::XOR:
case ISD::OR: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
IsRepeatedOpOrOneUseConstant(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
case ISD::MUL:
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
// better to truncate if we have the chance.
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
!TLI.isOperationLegal(Opcode, SrcVT))
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
IsRepeatedOpOrOneUseConstant(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
}
return SDValue();
}
/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
static SDValue
combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue Src = N->getOperand(0);
SDLoc DL(N);
// Attempt to pre-truncate inputs to arithmetic ops instead.
if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
return V;
// Try to detect AVG pattern first.
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;

View File

@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
From.ZSuffix # "rrkz")
To.KRCWM:$mask, From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext))>;
// Intrinsic call with masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x" # To.NumElts # "_" # From.Size)
From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
From.ZSuffix # "rrk")
To.RC:$src0,
(COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
From.RC:$src1, imm:$idx)>;
// Intrinsic call with zero-masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x" # To.NumElts # "_" # From.Size)
From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
From.ZSuffix # "rrkz")
(COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
From.RC:$src1, imm:$idx)>;
// Intrinsic call without masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
"x" # To.NumElts # "_" # From.Size)
From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
From.ZSuffix # "rr")
From.RC:$src1, imm:$idx)>;
}
// Codegen pattern for the alternative types
@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let isCodeGenOnly = 1 in {
defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
load, "ucomiss">, PS, EVEX, VEX_LIG,
defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
load, "ucomisd">, PD, EVEX,
defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
load, "comiss">, PS, EVEX, VEX_LIG,
defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
load, "comisd">, PD, EVEX,
defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
sse_load_f64, "comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}

View File

@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
ValueType vt, Operand memop,
ComplexPattern mem_cpat, string OpcodeStr> {
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
IIC_SSE_COMIS_RR>,
Sched<[WriteFAdd]>;
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
mem_cpat:$src2))],
IIC_SSE_COMIS_RM>,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, VEX, VEX_LIG;
@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
}
let isCodeGenOnly = 1 in {
defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
load, "ucomiss">, PS, VEX;
defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
load, "ucomisd">, PD, VEX;
defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss">, PS, VEX;
defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd">, PD, VEX;
defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
load, "comiss">, PS, VEX;
defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
load, "comisd">, PD, VEX;
defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss">, PS, VEX;
defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd">, PD, VEX;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS;
@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
}
let isCodeGenOnly = 1 in {
defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
load, "ucomiss">, PS;
defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
load, "ucomisd">, PD;
defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss">, PS;
defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd">, PD;
defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
"comiss">, PS;
defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
"comisd">, PD;
defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
sse_load_f32, "comiss">, PS;
defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
sse_load_f64, "comisd">, PD;
}
} // Defs = [EFLAGS]

View File

@ -1,4 +1,4 @@
//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {
// X86 EVEX encoded instructions that have a VEX 128 encoding
// (table format: <EVEX opcode, VEX-128 opcode>).
static const X86EvexToVexCompressTableEntry
X86EvexToVex128CompressTable[] = {
static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
// EVEX scalar with corresponding VEX.
{ X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm },
{ X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr },
@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
{ X86::VUCOMISDZrr , X86::VUCOMISDrr },
{ X86::VUCOMISSZrm , X86::VUCOMISSrm },
{ X86::VUCOMISSZrr , X86::VUCOMISSrr },
{ X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr },
{ X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
{ X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm },
{ X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr },
{ X86::VMOVLHPSZrr , X86::VMOVLHPSrr },
{ X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
{ X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
{ X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr },
{ X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr },
{ X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr },
{ X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr },
{ X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm },
{ X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr },
{ X86::VPEXTRBZmr , X86::VPEXTRBmr },
{ X86::VPEXTRBZrr , X86::VPEXTRBrr },
{ X86::VPEXTRDZmr , X86::VPEXTRDmr },
@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPEXTRQZrr , X86::VPEXTRQrr },
{ X86::VPEXTRWZmr , X86::VPEXTRWmr },
{ X86::VPEXTRWZrr , X86::VPEXTRWri },
{ X86::VPINSRBZrm , X86::VPINSRBrm },
{ X86::VPINSRBZrr , X86::VPINSRBrr },
{ X86::VPINSRDZrm , X86::VPINSRDrm },
@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VANDPDZ128rm , X86::VANDPDrm },
{ X86::VANDPDZ128rr , X86::VANDPDrr },
{ X86::VANDPSZ128rm , X86::VANDPSrm },
{ X86::VANDPSZ128rr , X86::VANDPSrr },
{ X86::VANDPSZ128rr , X86::VANDPSrr },
{ X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
{ X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
{ X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVAPDZ128rm , X86::VMOVAPDrm },
{ X86::VMOVAPDZ128rr , X86::VMOVAPDrr },
{ X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV },
{ X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
{ X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
{ X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
{ X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
{ X86::VMOVAPSZ128rr , X86::VMOVAPSrr },
{ X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV },
{ X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm },
@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVUPDZ128rm , X86::VMOVUPDrm },
{ X86::VMOVUPDZ128rr , X86::VMOVUPDrr },
{ X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV },
{ X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
{ X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
{ X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
{ X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
{ X86::VMOVUPSZ128rr , X86::VMOVUPSrr },
{ X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV },
{ X86::VMULPDZ128rm , X86::VMULPDrm },
@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr },
{ X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm },
{ X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr },
{ X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
{ X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
{ X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
{ X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
{ X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
{ X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
{ X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr },
{ X86::VPERMILPDZ128mi , X86::VPERMILPDmi },
{ X86::VPERMILPDZ128ri , X86::VPERMILPDri },
@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm },
{ X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr },
{ X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm },
{ X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
{ X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
{ X86::VPMULDQZ128rm , X86::VPMULDQrm },
{ X86::VPMULDQZ128rr , X86::VPMULDQrr },
{ X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm },
@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSHUFHWZ128ri , X86::VPSHUFHWri },
{ X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi },
{ X86::VPSHUFLWZ128ri , X86::VPSHUFLWri },
{ X86::VPSLLDQZ128rr , X86::VPSLLDQri },
{ X86::VPSLLDQZ128rr , X86::VPSLLDQri },
{ X86::VPSLLDZ128ri , X86::VPSLLDri },
{ X86::VPSLLDZ128rm , X86::VPSLLDrm },
{ X86::VPSLLDZ128rr , X86::VPSLLDrr },
{ X86::VPSLLDZ128rr , X86::VPSLLDrr },
{ X86::VPSLLQZ128ri , X86::VPSLLQri },
{ X86::VPSLLQZ128rm , X86::VPSLLQrm },
{ X86::VPSLLQZ128rr , X86::VPSLLQrr },
@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry
// X86 EVEX encoded instructions that have a VEX 256 encoding
// (table format: <EVEX opcode, VEX-256 opcode>).
static const X86EvexToVexCompressTableEntry
X86EvexToVex256CompressTable[] = {
static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
{ X86::VADDPDZ256rm , X86::VADDPDYrm },
{ X86::VADDPDZ256rr , X86::VADDPDYrr },
{ X86::VADDPSZ256rm , X86::VADDPSYrm },
@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
{ X86::VANDPDZ256rr , X86::VANDPDYrr },
{ X86::VANDPSZ256rm , X86::VANDPSYrm },
{ X86::VANDPSZ256rr , X86::VANDPSYrr },
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
{ X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
{ X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
{ X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VDIVPDZ256rr , X86::VDIVPDYrr },
{ X86::VDIVPSZ256rm , X86::VDIVPSYrm },
{ X86::VDIVPSZ256rr , X86::VDIVPSYrr },
{ X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
{ X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
{ X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
{ X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
{ X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
{ X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
{ X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
{ X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
{ X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
{ X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
{ X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
{ X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
{ X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
{ X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
{ X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
{ X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
{ X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
{ X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
{ X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
{ X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
{ X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
{ X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
{ X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
{ X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVAPDZ256rm , X86::VMOVAPDYrm },
{ X86::VMOVAPDZ256rr , X86::VMOVAPDYrr },
{ X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV },
{ X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
{ X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
{ X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
{ X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
{ X86::VMOVAPSZ256rr , X86::VMOVAPSYrr },
{ X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV },
{ X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm },
@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPAVGBZ256rr , X86::VPAVGBYrr },
{ X86::VPAVGWZ256rm , X86::VPAVGWYrm },
{ X86::VPAVGWZ256rr , X86::VPAVGWYrr },
{ X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
{ X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
{ X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
{ X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
{ X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
{ X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
{ X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
{ X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
{ X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
{ X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
{ X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
{ X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
{ X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
{ X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
{ X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
{ X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
{ X86::VPERMDZ256rm , X86::VPERMDYrm },
{ X86::VPERMDZ256rr , X86::VPERMDYrr },
{ X86::VPERMILPDZ256mi , X86::VPERMILPDYmi },
@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSLLDQZ256rr , X86::VPSLLDQYri },
{ X86::VPSLLDZ256ri , X86::VPSLLDYri },
{ X86::VPSLLDZ256rm , X86::VPSLLDYrm },
{ X86::VPSLLDZ256rr , X86::VPSLLDYrr },
{ X86::VPSLLDZ256rr , X86::VPSLLDYrr },
{ X86::VPSLLQZ256ri , X86::VPSLLQYri },
{ X86::VPSLLQZ256rm , X86::VPSLLQYrm },
{ X86::VPSLLQZ256rr , X86::VPSLLQYrr },
@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSLLVQZ256rr , X86::VPSLLVQYrr },
{ X86::VPSLLWZ256ri , X86::VPSLLWYri },
{ X86::VPSLLWZ256rm , X86::VPSLLWYrm },
{ X86::VPSLLWZ256rr , X86::VPSLLWYrr },
{ X86::VPSLLWZ256rr , X86::VPSLLWYrr },
{ X86::VPSRADZ256ri , X86::VPSRADYri },
{ X86::VPSRADZ256rm , X86::VPSRADYrm },
{ X86::VPSRADZ256rr , X86::VPSRADYrr },
@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSRLDQZ256rr , X86::VPSRLDQYri },
{ X86::VPSRLDZ256ri , X86::VPSRLDYri },
{ X86::VPSRLDZ256rm , X86::VPSRLDYrm },
{ X86::VPSRLDZ256rr , X86::VPSRLDYrr },
{ X86::VPSRLDZ256rr , X86::VPSRLDYrr },
{ X86::VPSRLQZ256ri , X86::VPSRLQYri },
{ X86::VPSRLQZ256rm , X86::VPSRLQYrm },
{ X86::VPSRLQZ256rr , X86::VPSRLQYrr },
@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
{ X86::VXORPSZ256rr , X86::VXORPSYrr },
};
#endif
#endif

View File

@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, INSERT_SUBVEC,
EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,

View File

@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
OutStreamer->EmitInstruction(TC, getSubtargetInfo());
}
void X86AsmPrinter::EmitXRayTable() {
if (Sleds.empty())
return;
auto PrevSection = OutStreamer->getCurrentSectionOnly();
auto Fn = MF->getFunction();
MCSection *Section = nullptr;
if (Subtarget->isTargetELF()) {
if (Fn->hasComdat()) {
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
Fn->getComdat()->getName());
} else {
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC);
}
} else if (Subtarget->isTargetMachO()) {
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
// Before we switch over, we force a reference to a label inside the
// xray_instr_map section. Since EmitXRayTable() is always called just
// before the function's end, we assume that this is happening after the
// last return instruction.
//
// We then align the reference to 16 byte boundaries, which we determined
// experimentally to be beneficial to avoid causing decoder stalls.
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
OutStreamer->EmitCodeAlignment(16);
OutStreamer->EmitSymbolValue(Tmp, 8, false);
OutStreamer->SwitchSection(Section);
OutStreamer->EmitLabel(Tmp);
for (const auto &Sled : Sleds) {
OutStreamer->EmitSymbolValue(Sled.Sled, 8);
OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
auto Kind = static_cast<uint8_t>(Sled.Kind);
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Kind), 1));
OutStreamer->EmitBytes(
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
OutStreamer->EmitZeros(14);
}
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}
// Returns instruction preceding MBBI in MachineFunction.
// If MBBI is the first instruction of the first basic block, returns null.
static MachineBasicBlock::const_iterator

View File

@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost(
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (Kind == TTI::SK_Reverse) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb
};
if (ST->hasVBMI())
if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
{ ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
// + 2*pshufb + vinserti64x4
};
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd
{ ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
{ ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq
{ ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
};
if (ST->hasAVX512())
if (const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd
{ ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps
{ ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq
{ ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb
};
if (ST->hasAVX2())
if (const auto *Entry =
CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
{ ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
{ ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
{ ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
// + vinsertf128
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb
// + vinsertf128
};
if (ST->hasAVX())
if (const auto *Entry =
CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
{ ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb
};
if (ST->hasSSSE3())
if (const auto *Entry =
CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE2ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
{ ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
{ ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
{ ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
{ ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
};
if (ST->hasSSE2())
if (const auto *Entry =
CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE1ShuffleTbl[] = {
{ ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
};
if (ST->hasSSE1())
if (const auto *Entry =
CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
} else if (Kind == TTI::SK_Alternate) {
if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
if (ST->hasAVX2() && LT.second == MVT::v16i16)
return LT.first;
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
{ TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
};
static const CostTblEntry AVXAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
if (ST->hasVBMI())
if (const auto *Entry =
CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
static const CostTblEntry AVX512BWShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
{ TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
// + 2*pshufb + vinserti64x4
};
// This shuffle is custom lowered into a sequence of:
// 2x vextractf128 , 2x vpblendw , 1x vinsertf128
{ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
if (ST->hasBWI())
if (const auto *Entry =
CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
// This shuffle is custom lowered into a long sequence of:
// 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
static const CostTblEntry AVX512ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
{ TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
{ TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
{ TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
};
if (ST->hasAVX512())
if (const auto *Entry =
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
{ TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
{ TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
{ TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
{ TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
{ TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
};
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
{ TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
{ TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
{ TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
{ TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
// + vinsertf128
{ TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
// + vinsertf128
{ TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
{ TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
{ TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
{ TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
};
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE41AltShuffleTbl[] = {
// These are lowered into movsd.
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
// packed float vectors with four elements are lowered into BLENDI dag
// nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
// This shuffle generates a single pshufw.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
// There is no instruction that matches a v16i8 alternate shuffle.
// The backend will expand it into the sequence 'pshufb + pshufb + or'.
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
static const CostTblEntry SSE41ShuffleTbl[] = {
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
{ TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
};
if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
LT.second))
if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3AltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
static const CostTblEntry SSSE3ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
// SSE3 doesn't have 'blendps'. The following shuffles are expanded into
// the sequence 'shufps + pshufd'
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
{ TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
};
if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSEAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
static const CostTblEntry SSE2ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
{ TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
{ TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
{ TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
{ TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
// This is expanded into a long sequence of four extract + four insert.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
// 8 x (pinsrw + pextrw + and + movb + movzb + or)
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
{ TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
};
// Fall-back (SSE3 and SSE2).
if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE1ShuffleTbl[] = {
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
{ TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
};
if (ST->hasSSE1())
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
} else if (Kind == TTI::SK_PermuteTwoSrc) {
// We assume that source and destination have the same vector type.

View File

@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// add(zext(xor i16 X, -32768), -32768) --> sext X
return CastInst::Create(Instruction::SExt, X, LHS->getType());
}
if (Val->isNegative() &&
match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
Val->sge(-C->sext(Val->getBitWidth()))) {
// (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
return CastInst::Create(
Instruction::ZExt,
Builder->CreateNUWAdd(
X, Constant::getIntegerValue(X->getType(),
*C + Val->trunc(C->getBitWidth()))),
I.getType());
}
}
// FIXME: Use the match above instead of dyn_cast to allow these transforms

View File

@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
break;
}
case Intrinsic::fma:
case Intrinsic::fmuladd: {
Value *Src0 = II->getArgOperand(0);
Value *Src1 = II->getArgOperand(1);
// Canonicalize constants into the RHS.
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
II->setArgOperand(0, Src1);
II->setArgOperand(1, Src0);
std::swap(Src0, Src1);
}
Value *LHS = nullptr;
Value *RHS = nullptr;
// fma fneg(x), fneg(y), z -> fma x, y, z
if (match(Src0, m_FNeg(m_Value(LHS))) &&
match(Src1, m_FNeg(m_Value(RHS)))) {
CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
{LHS, RHS, II->getArgOperand(2)});
NewCall->takeName(II);
NewCall->copyFastMathFlags(II);
return replaceInstUsesWith(*II, NewCall);
}
// fma fabs(x), fabs(x), z -> fma x, x, z
if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
{LHS, LHS, II->getArgOperand(2)});
NewCall->takeName(II);
NewCall->copyFastMathFlags(II);
return replaceInstUsesWith(*II, NewCall);
}
// fma x, 1, z -> fadd x, z
if (match(Src1, m_FPOne())) {
Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
RI->copyFastMathFlags(II);
return RI;
}
break;
}
case Intrinsic::fabs: {
Value *Cond;
Constant *LHS, *RHS;
if (match(II->getArgOperand(0),
m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
return SelectInst::Create(Cond, Call0, Call1);
}
break;
}
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
// (if assume is valid at the load)
if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
Value *LHS = ICmp->getOperand(0);
Value *RHS = ICmp->getOperand(1);
if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
isa<LoadInst>(LHS) &&
isa<Constant>(RHS) &&
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
if (isValidAssumeForContext(II, LI, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return eraseInstFromFunction(*II);
}
}
CmpInst::Predicate Pred;
Instruction *LHS;
if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
LHS->getType()->isPointerTy() &&
isValidAssumeForContext(II, LHS, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
return eraseInstFromFunction(*II);
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
}
// If there is a dominating assume with the same condition as this one,
// then this one is redundant, and should be removed.
APInt KnownZero(1, 0), KnownOne(1, 0);

View File

@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// separated by a few arithmetic operations.
BasicBlock::iterator BBI(LI);
bool IsLoadCSE = false;
if (Value *AvailableVal =
FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
DefMaxInstsToScan, AA, &IsLoadCSE)) {
if (IsLoadCSE) {
LoadInst *NLI = cast<LoadInst>(AvailableVal);
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias, LLVMContext::MD_range,
LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null};
combineMetadata(NLI, &LI, KnownIDs);
};
if (Value *AvailableVal = FindAvailableLoadedValue(
&LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
if (IsLoadCSE)
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
return replaceInstUsesWith(
LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),

View File

@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned ShAmt = Op1C->getZExtValue();
// Turn:
// %zext = zext i32 %V to i64
// %res = shl i64 %V, 8
//
// Into:
// %shl = shl i32 %V, 8
// %res = zext i32 %shl to i64
//
// This is only valid if %V would have zeros shifted out.
if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
if (ShAmt < SrcBitWidth &&
MaskedValueIsZero(ZI->getOperand(0),
APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
return new ZExtInst(Shl, I.getType());
}
}
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
MaskedValueIsZero(I.getOperand(0),

View File

@ -481,9 +481,9 @@ private:
bool processNode(DomTreeNode *Node);
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
if (auto *LI = dyn_cast<LoadInst>(Inst))
return LI;
else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
if (auto *SI = dyn_cast<StoreInst>(Inst))
return SI->getValueOperand();
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),

View File

@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
//===----------------------------------------------------------------------===//
// GVN Pass
@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
// If this store's memorydef stores the same value as the last store, the
// memory accesses are equivalent.
// Get the expression, if any, for the RHS of the MemoryDef.
MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
// See if this store expression already has a value, and it's the same as our
// current store. FIXME: Right now, we only do this for simple stores.
// See if we are defined by a previous store expression, it already has a
// value, and it's the same value as our current store. FIXME: Right now, we
// only do this for simple stores, we should expand to cover memcpys, etc.
if (SI->isSimple()) {
// Get the expression, if any, for the RHS of the MemoryDef.
MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
if (auto *I = dyn_cast<Instruction>(V)) {
if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
// If this is a MemoryDef, we need to update the equivalence table. If
// we
// determined the expression is congruent to a different memory state,
// use that different memory state. If we determined it didn't, we
// update
// that as well. Note that currently, we do not guarantee the
// "different" memory state dominates us. The goal is to make things
// that are congruent look congruent, not ensure we can eliminate one in
// favor of the other.
// Right now, the only way they can be equivalent is for store
// expresions.
if (!isa<MemoryUse>(MA)) {
if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
} else {
setMemoryAccessEquivTo(MA, nullptr);
}
// we determined the expression is congruent to a different memory
// state, use that different memory state. If we determined it didn't,
// we update that as well. Right now, we only support store
// expressions.
if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
EClass->Members.size() != 1) {
auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
} else {
setMemoryAccessEquivTo(MA, nullptr);
}
markMemoryUsersTouched(MA);
}
@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
} else {
// Handle terminators that return values. All of them produce values we
// don't currently understand.
if (!I->getType()->isVoidTy()){
if (!I->getType()->isVoidTy()) {
auto *Symbolized = createUnknownExpression(I);
performCongruenceFinding(I, Symbolized);
}
@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() {
continue;
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
if (FirstMUD && SecondMUD) {
auto *FirstInst = FirstMUD->getMemoryInst();
auto *SecondInst = SecondMUD->getMemoryInst();
if (FirstMUD && SecondMUD)
assert(
ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
"The instructions for these memory operations should have been in "
"the same congruence class");
}
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
// We can only sanely verify that MemoryDefs in the operand list all have
@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
initializeCongruenceClasses(F);
unsigned int Iterations = 0;
// We start out in the entry block.
BasicBlock *LastBlock = &F.getEntryBlock();
while (TouchedInstructions.any()) {
++Iterations;
// Walk through all the instructions in all the blocks in RPO.
for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
InstrNum = TouchedInstructions.find_next(InstrNum)) {
@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
TouchedInstructions.reset(InstrNum);
}
}
// FIXME: Move this to expensive checks when we are satisfied with NewGVN
NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
#ifndef NDEBUG
verifyMemoryCongruency();
#endif
@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Cleanup the congruence class.
SmallPtrSet<Value *, 4> MembersLeft;
for (Value * Member : CC->Members) {
for (Value *Member : CC->Members) {
if (Member->getType()->isVoidTy()) {
MembersLeft.insert(Member);
continue;

View File

@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
/// When inlining a function that contains noalias scope metadata,
/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
/// have different "unique scopes" at every call site. Were this not done, then
/// aliasing scopes from a function inlined into a caller multiple times could
/// not be differentiated (and this would lead to miscompiles because the
/// non-aliasing property communicated by the metadata could have

View File

@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
uint64_t TrueWeight, FalseWeight;
uint64_t ExitWeight = 0, BackEdgeWeight = 0;
uint64_t ExitWeight = 0, CurHeaderWeight = 0;
if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
// The # of times the loop body executes is the sum of the exit block
// weight and the # of times the backedges are taken.
CurHeaderWeight = TrueWeight + FalseWeight;
}
// For each peeled-off iteration, make a copy of the loop.
@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
// The exit weight of the previous iteration is the header entry weight
// of the current iteration. So this is exactly how many dynamic iterations
// the current peeled-off static iteration uses up.
// Subtract the exit weight from the current header weight -- the exit
// weight is exactly the weight of the previous iteration's header.
// FIXME: due to the way the distribution is constructed, we need a
// guard here to make sure we don't end up with non-positive weights.
if (ExitWeight < BackEdgeWeight)
BackEdgeWeight -= ExitWeight;
if (ExitWeight < CurHeaderWeight)
CurHeaderWeight -= ExitWeight;
else
BackEdgeWeight = 1;
CurHeaderWeight = 1;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
NewBlocks, LoopBlocks, VMap, LVMap, LI);
@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// Adjust the branch weights on the loop exit.
if (ExitWeight) {
// The backedge count is the difference of current header weight and
// current loop exit weight. If the current header weight is smaller than
// the current loop exit weight, we mark the loop backedge weight as 1.
uint64_t BackEdgeWeight = 0;
if (ExitWeight < CurHeaderWeight)
BackEdgeWeight = CurHeaderWeight - ExitWeight;
else
BackEdgeWeight = 1;
MDBuilder MDB(LatchBR->getContext());
MDNode *WeightNode =
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)

View File

@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
I0->getOperandUse(O).set(NewOperands[O]);
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
// Update metadata and IR flags.
// The debug location for the "common" instruction is the merged locations of
// all the commoned instructions. We start with the original location of the
// "common" instruction and iteratively merge each location in the loop below.
DILocation *Loc = I0->getDebugLoc();
// Update metadata and IR flags, and merge debug locations.
for (auto *I : Insts)
if (I != I0) {
Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
combineMetadataForCSE(I0, I);
I0->andIRFlags(I);
}
if (!isa<CallInst>(I0))
I0->setDebugLoc(Loc);
if (!isa<StoreInst>(I0)) {
// canSinkLastInstruction checked that all instructions were used by

View File

@ -2366,7 +2366,11 @@ enum CXCursorKind {
*/
CXCursor_OMPTargetTeamsDistributeParallelForDirective = 277,
CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForDirective,
/** \brief OpenMP target teams distribute parallel for simd directive.
*/
CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective = 278,
CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective,
/**
* \brief Cursor that represents the translation unit itself.

View File

@ -2669,6 +2669,9 @@ DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeDirective,
DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForSimdDirective,
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
// OpenMP clauses.
template <typename Derived>
bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {

View File

@ -3638,6 +3638,79 @@ public:
}
};
/// This represents '#pragma omp target teams distribute parallel for simd'
/// combined directive.
///
/// \code
/// #pragma omp target teams distribute parallel for simd private(x)
/// \endcode
/// In this example directive '#pragma omp target teams distribute parallel
/// for simd' has clause 'private' with the variables 'x'
///
class OMPTargetTeamsDistributeParallelForSimdDirective final
: public OMPLoopDirective {
friend class ASTStmtReader;
/// Build directive with the given start and end location.
///
/// \param StartLoc Starting location of the directive kind.
/// \param EndLoc Ending location of the directive.
/// \param CollapsedNum Number of collapsed nested loops.
/// \param NumClauses Number of clauses.
///
OMPTargetTeamsDistributeParallelForSimdDirective(SourceLocation StartLoc,
SourceLocation EndLoc,
unsigned CollapsedNum,
unsigned NumClauses)
: OMPLoopDirective(this,
OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
OMPD_target_teams_distribute_parallel_for_simd,
StartLoc, EndLoc, CollapsedNum, NumClauses) {}
/// Build an empty directive.
///
/// \param CollapsedNum Number of collapsed nested loops.
/// \param NumClauses Number of clauses.
///
explicit OMPTargetTeamsDistributeParallelForSimdDirective(
unsigned CollapsedNum, unsigned NumClauses)
: OMPLoopDirective(
this, OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
OMPD_target_teams_distribute_parallel_for_simd, SourceLocation(),
SourceLocation(), CollapsedNum, NumClauses) {}
public:
/// Creates directive with a list of \a Clauses.
///
/// \param C AST context.
/// \param StartLoc Starting location of the directive kind.
/// \param EndLoc Ending Location of the directive.
/// \param CollapsedNum Number of collapsed loops.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
///
static OMPTargetTeamsDistributeParallelForSimdDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
/// \param C AST context.
/// \param CollapsedNum Number of collapsed nested loops.
/// \param NumClauses Number of clauses.
///
static OMPTargetTeamsDistributeParallelForSimdDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
static bool classof(const Stmt *T) {
return T->getStmtClass() ==
OMPTargetTeamsDistributeParallelForSimdDirectiveClass;
}
};
} // end namespace clang
#endif

View File

@ -3921,6 +3921,8 @@ def ext_ms_deref_template_argument: ExtWarn<
def ext_ms_delayed_template_argument: ExtWarn<
"using the undeclared type %0 as a default template argument is a "
"Microsoft extension">, InGroup<MicrosoftTemplate>;
def err_template_arg_deduced_incomplete_pack : Error<
"deduced incomplete pack %0 for template parameter %1">;
// C++ template specialization
def err_template_spec_unknown_kind : Error<

View File

@ -162,6 +162,9 @@
#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)
#endif
#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)
#endif
// OpenMP directives.
OPENMP_DIRECTIVE(threadprivate)
@ -214,6 +217,7 @@ OPENMP_DIRECTIVE_EXT(teams_distribute_parallel_for, "teams distribute parallel f
OPENMP_DIRECTIVE_EXT(target_teams, "target teams")
OPENMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distribute parallel for")
OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd")
// OpenMP clauses.
OPENMP_CLAUSE(if, OMPIfClause)
@ -793,6 +797,33 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
// Clauses allowed for OpenMP directive
// 'target teams distribute parallel for simd'.
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(device)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(map)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(nowait)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(depend)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(defaultmap)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(is_device_ptr)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_teams)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(thread_limit)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen)
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
#undef OPENMP_TASKLOOP_SIMD_CLAUSE
#undef OPENMP_TASKLOOP_CLAUSE
#undef OPENMP_LINEAR_KIND
@ -843,3 +874,4 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
#undef OPENMP_TARGET_TEAMS_CLAUSE
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_CLAUSE
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE

View File

@ -243,3 +243,4 @@ def OMPTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
def OMPTargetTeamsDirective : DStmt<OMPExecutableDirective>;
def OMPTargetTeamsDistributeDirective : DStmt<OMPLoopDirective>;
def OMPTargetTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
def OMPTargetTeamsDistributeParallelForSimdDirective : DStmt<OMPLoopDirective>;

View File

@ -882,7 +882,7 @@ def fms_compatibility_version
"(default))">;
def fdelayed_template_parsing : Flag<["-"], "fdelayed-template-parsing">, Group<f_Group>,
HelpText<"Parse templated function definitions at the end of the "
"translation unit">, Flags<[CC1Option]>;
"translation unit">, Flags<[CC1Option, CoreOption]>;
def fms_memptr_rep_EQ : Joined<["-"], "fms-memptr-rep=">, Group<f_Group>, Flags<[CC1Option]>;
def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>,
Flags<[DriverOption, CC1Option]>, MetaVarName<"<directory>">,
@ -1031,7 +1031,8 @@ def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>,
Flags<[CoreOption]>;
def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>,
Flags<[CoreOption]>;
def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>;
def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>,
Flags<[DriverOption, CoreOption]>;
def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group<f_Group>;
def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group<f_Group>;
def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group<f_Group>, Flags<[CC1Option]>;
@ -1331,6 +1332,12 @@ def funique_section_names : Flag <["-"], "funique-section-names">,
def fno_unique_section_names : Flag <["-"], "fno-unique-section-names">,
Group<f_Group>, Flags<[CC1Option]>;
def fstrict_return : Flag<["-"], "fstrict-return">, Group<f_Group>,
Flags<[CC1Option]>,
HelpText<"Always treat control flow paths that fall off the end of a non-void"
"function as unreachable">;
def fno_strict_return : Flag<["-"], "fno-strict-return">, Group<f_Group>,
Flags<[CC1Option]>;
def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group<f_Group>,
Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;

View File

@ -251,6 +251,10 @@ CODEGENOPT(DiagnosticsWithHotness, 1, 0)
/// Whether copy relocations support is available when building as PIE.
CODEGENOPT(PIECopyRelocations, 1, 0)
/// Whether we should use the undefined behaviour optimization for control flow
/// paths that reach the end of a function without executing a required return.
CODEGENOPT(StrictReturn, 1, 1)
#undef CODEGENOPT
#undef ENUM_CODEGENOPT
#undef VALUE_CODEGENOPT

View File

@ -8514,6 +8514,12 @@ public:
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc,
llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
/// Called on well-formed '\#pragma omp target teams distribute parallel for
/// simd' after parsing of the associated statement.
StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
SourceLocation EndLoc,
llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
/// Checks correctness of linear modifiers.
bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,

View File

@ -1516,6 +1516,7 @@ namespace clang {
STMT_OMP_TARGET_TEAMS_DIRECTIVE,
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE,
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE,
EXPR_OMP_ARRAY_SECTION,
// ARC

View File

@ -0,0 +1,100 @@
//===--- Diagnostic.h - Framework for clang diagnostics tools --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// \file
// Structures supporting diagnostics and refactorings that span multiple
// translation units. Indicate diagnostics reports and replacements
// suggestions for the analyzed sources.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
#define LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
#include "Replacement.h"
#include "clang/Basic/Diagnostic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include <string>
namespace clang {
namespace tooling {
/// \brief Represents the diagnostic message with the error message associated
/// and the information on the location of the problem.
struct DiagnosticMessage {
DiagnosticMessage(llvm::StringRef Message = "");
/// \brief Constructs a diagnostic message with anoffset to the diagnostic
/// within the file where the problem occured.
///
/// \param Loc Should be a file location, it is not meaningful for a macro
/// location.
///
DiagnosticMessage(llvm::StringRef Message, const SourceManager &Sources,
SourceLocation Loc);
std::string Message;
std::string FilePath;
unsigned FileOffset;
};
/// \brief Represents the diagnostic with the level of severity and possible
/// fixes to be applied.
struct Diagnostic {
enum Level {
Warning = DiagnosticsEngine::Warning,
Error = DiagnosticsEngine::Error
};
Diagnostic() = default;
Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel,
StringRef BuildDirectory);
Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message,
llvm::StringMap<Replacements> &Fix,
SmallVector<DiagnosticMessage, 1> &Notes, Level DiagLevel,
llvm::StringRef BuildDirectory);
/// \brief Name identifying the Diagnostic.
std::string DiagnosticName;
/// \brief Message associated to the diagnostic.
DiagnosticMessage Message;
/// \brief Fixes to apply, grouped by file path.
llvm::StringMap<Replacements> Fix;
/// \brief Potential notes about the diagnostic.
SmallVector<DiagnosticMessage, 1> Notes;
/// \brief Diagnostic level. Can indicate either an error or a warning.
Level DiagLevel;
/// \brief A build directory of the diagnostic source file.
///
/// It's an absolute path which is `directory` field of the source file in
/// compilation database. If users don't specify the compilation database
/// directory, it is the current directory where clang-tidy runs.
///
/// Note: it is empty in unittest.
std::string BuildDirectory;
};
/// \brief Collection of Diagnostics generated from a single translation unit.
struct TranslationUnitDiagnostics {
/// Name of the main source for the translation unit.
std::string MainSourceFile;
std::vector<Diagnostic> Diagnostics;
};
} // end namespace tooling
} // end namespace clang
#endif // LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H

View File

@ -329,12 +329,6 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
struct TranslationUnitReplacements {
/// Name of the main source for the translation unit.
std::string MainSourceFile;
/// A freeform chunk of text to describe the context of the replacements.
/// Will be printed, for example, when detecting conflicts during replacement
/// deduplication.
std::string Context;
std::vector<Replacement> Replacements;
};

View File

@ -0,0 +1,101 @@
//===-- DiagnosticsYaml.h -- Serialiazation for Diagnosticss ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file defines the structure of a YAML document for serializing
/// diagnostics.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
#define LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
#include "clang/Tooling/Core/Diagnostic.h"
#include "clang/Tooling/ReplacementsYaml.h"
#include "llvm/Support/YAMLTraits.h"
#include <string>
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::Diagnostic)
namespace llvm {
namespace yaml {
template <> struct MappingTraits<clang::tooling::Diagnostic> {
/// \brief Helper to (de)serialize a Diagnostic since we don't have direct
/// access to its data members.
class NormalizedDiagnostic {
public:
NormalizedDiagnostic(const IO &)
: DiagLevel(clang::tooling::Diagnostic::Level::Warning) {}
NormalizedDiagnostic(const IO &, const clang::tooling::Diagnostic &D)
: DiagnosticName(D.DiagnosticName), Message(D.Message), Fix(D.Fix),
Notes(D.Notes), DiagLevel(D.DiagLevel),
BuildDirectory(D.BuildDirectory) {}
clang::tooling::Diagnostic denormalize(const IO &) {
return clang::tooling::Diagnostic(DiagnosticName, Message, Fix, Notes,
DiagLevel, BuildDirectory);
}
std::string DiagnosticName;
clang::tooling::DiagnosticMessage Message;
llvm::StringMap<clang::tooling::Replacements> Fix;
SmallVector<clang::tooling::DiagnosticMessage, 1> Notes;
clang::tooling::Diagnostic::Level DiagLevel;
std::string BuildDirectory;
};
static void mapping(IO &Io, clang::tooling::Diagnostic &D) {
MappingNormalization<NormalizedDiagnostic, clang::tooling::Diagnostic> Keys(
Io, D);
Io.mapRequired("DiagnosticName", Keys->DiagnosticName);
// FIXME: Export properly all the different fields.
std::vector<clang::tooling::Replacement> Fixes;
for (auto &Replacements : Keys->Fix) {
for (auto &Replacement : Replacements.second) {
Fixes.push_back(Replacement);
}
}
Io.mapRequired("Replacements", Fixes);
for (auto &Fix : Fixes) {
llvm::Error Err = Keys->Fix[Fix.getFilePath()].add(Fix);
if (Err) {
// FIXME: Implement better conflict handling.
llvm::errs() << "Fix conflicts with existing fix: "
<< llvm::toString(std::move(Err)) << "\n";
}
}
}
};
/// \brief Specialized MappingTraits to describe how a
/// TranslationUnitDiagnostics is (de)serialized.
template <> struct MappingTraits<clang::tooling::TranslationUnitDiagnostics> {
static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) {
Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
std::vector<clang::tooling::Diagnostic> Diagnostics;
for (auto &Diagnostic : Doc.Diagnostics) {
// FIXME: Export all diagnostics, not just the ones with fixes.
// Update MappingTraits<clang::tooling::Diagnostic>::mapping.
if (Diagnostic.Fix.size() > 0) {
Diagnostics.push_back(Diagnostic);
}
}
Io.mapRequired("Diagnostics", Diagnostics);
Doc.Diagnostics = Diagnostics;
}
};
} // end namespace yaml
} // end namespace llvm
#endif // LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H

View File

@ -65,7 +65,6 @@ template <> struct MappingTraits<clang::tooling::TranslationUnitReplacements> {
static void mapping(IO &Io,
clang::tooling::TranslationUnitReplacements &Doc) {
Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
Io.mapOptional("Context", Doc.Context, std::string());
Io.mapRequired("Replacements", Doc.Replacements);
}
};

View File

@ -7192,6 +7192,12 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
CharUnits &EndOffset) {
bool DetermineForCompleteObject = refersToCompleteObject(LVal);
auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
return false;
return HandleSizeof(Info, ExprLoc, Ty, Result);
};
// We want to evaluate the size of the entire object. This is a valid fallback
// for when Type=1 and the designator is invalid, because we're asked for an
// upper-bound.
@ -7209,7 +7215,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
return false;
QualType BaseTy = getObjectType(LVal.getLValueBase());
return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset);
return CheckedHandleSizeof(BaseTy, EndOffset);
}
// We want to evaluate the size of a subobject.
@ -7238,7 +7244,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
}
CharUnits BytesPerElem;
if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem))
if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem))
return false;
// According to the GCC documentation, we want the size of the subobject

View File

@ -1659,3 +1659,64 @@ OMPTargetTeamsDistributeParallelForDirective::CreateEmpty(const ASTContext &C,
return new (Mem)
OMPTargetTeamsDistributeParallelForDirective(CollapsedNum, NumClauses);
}
OMPTargetTeamsDistributeParallelForSimdDirective *
OMPTargetTeamsDistributeParallelForSimdDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
const HelperExprs &Exprs) {
auto Size =
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
Size + sizeof(OMPClause *) * Clauses.size() +
sizeof(Stmt *) *
numLoopChildren(CollapsedNum,
OMPD_target_teams_distribute_parallel_for_simd));
OMPTargetTeamsDistributeParallelForSimdDirective *Dir =
new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
StartLoc, EndLoc, CollapsedNum, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
Dir->setIterationVariable(Exprs.IterationVarRef);
Dir->setLastIteration(Exprs.LastIteration);
Dir->setCalcLastIteration(Exprs.CalcLastIteration);
Dir->setPreCond(Exprs.PreCond);
Dir->setCond(Exprs.Cond);
Dir->setInit(Exprs.Init);
Dir->setInc(Exprs.Inc);
Dir->setIsLastIterVariable(Exprs.IL);
Dir->setLowerBoundVariable(Exprs.LB);
Dir->setUpperBoundVariable(Exprs.UB);
Dir->setStrideVariable(Exprs.ST);
Dir->setEnsureUpperBound(Exprs.EUB);
Dir->setNextLowerBound(Exprs.NLB);
Dir->setNextUpperBound(Exprs.NUB);
Dir->setNumIterations(Exprs.NumIterations);
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
Dir->setCounters(Exprs.Counters);
Dir->setPrivateCounters(Exprs.PrivateCounters);
Dir->setInits(Exprs.Inits);
Dir->setUpdates(Exprs.Updates);
Dir->setFinals(Exprs.Finals);
Dir->setPreInits(Exprs.PreInits);
return Dir;
}
OMPTargetTeamsDistributeParallelForSimdDirective *
OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty(
const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell) {
auto Size =
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
Size + sizeof(OMPClause *) * NumClauses +
sizeof(Stmt *) *
numLoopChildren(CollapsedNum,
OMPD_target_teams_distribute_parallel_for_simd));
return new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
CollapsedNum, NumClauses);
}

View File

@ -1244,6 +1244,12 @@ void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective(
PrintOMPExecutableDirective(Node);
}
void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
OMPTargetTeamsDistributeParallelForSimdDirective *Node) {
Indent() << "#pragma omp target teams distribute parallel for simd ";
PrintOMPExecutableDirective(Node);
}
//===----------------------------------------------------------------------===//
// Expr printing methods.
//===----------------------------------------------------------------------===//

View File

@ -763,6 +763,11 @@ void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForDirective(
VisitOMPLoopDirective(S);
}
void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
const OMPTargetTeamsDistributeParallelForSimdDirective *S) {
VisitOMPLoopDirective(S);
}
void StmtProfiler::VisitExpr(const Expr *S) {
VisitStmt(S);
}

View File

@ -1690,15 +1690,19 @@ CFGBuilder::VisitLogicalOperator(BinaryOperator *B,
// we have been provided.
ExitBlock = RHSBlock = createBlock(false);
// Even though KnownVal is only used in the else branch of the next
// conditional, tryEvaluateBool performs additional checking on the
// Expr, so it should be called unconditionally.
TryResult KnownVal = tryEvaluateBool(RHS);
if (!KnownVal.isKnown())
KnownVal = tryEvaluateBool(B);
if (!Term) {
assert(TrueBlock == FalseBlock);
addSuccessor(RHSBlock, TrueBlock);
}
else {
RHSBlock->setTerminator(Term);
TryResult KnownVal = tryEvaluateBool(RHS);
if (!KnownVal.isKnown())
KnownVal = tryEvaluateBool(B);
addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse());
addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue());
}

View File

@ -685,6 +685,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name) \
case OMPC_##Name: \
return true;
#include "clang/Basic/OpenMPKinds.def"
default:
break;
}
break;
case OMPD_target_teams_distribute_parallel_for_simd:
switch (CKind) {
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name) \
case OMPC_##Name: \
return true;
#include "clang/Basic/OpenMPKinds.def"
default:
break;
@ -721,7 +731,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_teams_distribute_parallel_for_simd ||
DKind == OMPD_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute ||
DKind == OMPD_target_teams_distribute_parallel_for;
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@ -735,8 +746,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_target_parallel_for_simd ||
DKind == OMPD_teams_distribute_parallel_for_simd ||
DKind == OMPD_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for;
// TODO add next directives.
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
@ -752,8 +763,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_target_parallel_for_simd ||
DKind == OMPD_teams_distribute_parallel_for ||
DKind == OMPD_teams_distribute_parallel_for_simd ||
DKind == OMPD_target_teams_distribute_parallel_for;
// TODO add next directives.
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
@ -761,7 +772,8 @@ bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_target_parallel_for ||
DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd ||
DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
DKind == OMPD_target_teams_distribute_parallel_for;
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) {
@ -779,7 +791,8 @@ bool clang::isOpenMPNestingTeamsDirective(OpenMPDirectiveKind DKind) {
bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
return isOpenMPNestingTeamsDirective(DKind) ||
DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
DKind == OMPD_target_teams_distribute_parallel_for;
DKind == OMPD_target_teams_distribute_parallel_for ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
@ -788,8 +801,8 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
DKind == OMPD_distribute_parallel_for_simd ||
DKind == OMPD_distribute_simd || DKind == OMPD_target_simd ||
DKind == OMPD_teams_distribute_simd ||
DKind == OMPD_teams_distribute_parallel_for_simd;
// TODO add next directives.
DKind == OMPD_teams_distribute_parallel_for_simd ||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) {
@ -805,7 +818,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
Kind == OMPD_teams_distribute_parallel_for_simd ||
Kind == OMPD_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute ||
Kind == OMPD_target_teams_distribute_parallel_for;
Kind == OMPD_target_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
}
bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
@ -830,5 +844,6 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
Kind == OMPD_teams_distribute_parallel_for_simd ||
Kind == OMPD_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute ||
Kind == OMPD_target_teams_distribute_parallel_for;
Kind == OMPD_target_teams_distribute_parallel_for ||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
}

View File

@ -20,53 +20,64 @@
using namespace clang;
using namespace CodeGen;
/// \brief Get the GPU warp size.
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
namespace {
enum OpenMPRTLFunctionNVPTX {
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
/// kmp_int32 thread_limit);
OMPRTL_NVPTX__kmpc_kernel_init,
};
// NVPTX Address space
enum AddressSpace {
AddressSpaceShared = 3,
};
} // namespace
/// Get the GPU warp size.
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
llvm::None, "nvptx_warp_size");
}
/// \brief Get the id of the current thread on the GPU.
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
/// Get the id of the current thread on the GPU.
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
llvm::None, "nvptx_tid");
}
// \brief Get the maximum number of threads in a block of the GPU.
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
/// Get the maximum number of threads in a block of the GPU.
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
return Bld.CreateCall(
llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
llvm::None, "nvptx_num_threads");
}
/// \brief Get barrier to synchronize all threads in a block.
void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
/// Get barrier to synchronize all threads in a block.
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
}
// \brief Synchronize all GPU threads in a block.
void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
getNVPTXCTABarrier(CGF);
}
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
/// \brief Get the thread id of the OMP master thread.
/// Get the thread id of the OMP master thread.
/// The master thread id is the first thread (lane) of the last warp in the
/// GPU block. Warp size is assumed to be some power of 2.
/// Thread id is 0 indexed.
/// E.g: If NumThreads is 33, master id is 32.
/// If NumThreads is 64, master id is 32.
/// If NumThreads is 1024, master id is 992.
llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
Bld.CreateNot(Mask), "master_tid");
}
namespace {
enum OpenMPRTLFunctionNVPTX {
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
/// kmp_int32 thread_limit);
OMPRTL_NVPTX__kmpc_kernel_init,
};
// NVPTX Address space
enum ADDRESS_SPACE {
ADDRESS_SPACE_SHARED = 3,
};
} // namespace
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
CodeGenModule &CGM)
: WorkerFn(nullptr), CGFI(nullptr) {
@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
llvm::GlobalValue::CommonLinkage,
llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
WorkID = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
llvm::GlobalValue::CommonLinkage,
llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
}

View File

@ -49,38 +49,6 @@ public:
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
private:
//
// NVPTX calls.
//
/// \brief Get the GPU warp size.
llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
/// \brief Get the id of the current thread on the GPU.
llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
// \brief Get the maximum number of threads in a block of the GPU.
llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
/// \brief Get barrier to synchronize all threads in a block.
void getNVPTXCTABarrier(CodeGenFunction &CGF);
// \brief Synchronize all GPU threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);
//
// OMP calls.
//
/// \brief Get the thread id of the OMP master thread.
/// The master thread id is the first thread (lane) of the last warp in the
/// GPU block. Warp size is assumed to be some power of 2.
/// Thread id is 0 indexed.
/// E.g: If NumThreads is 33, master id is 32.
/// If NumThreads is 64, master id is 32.
/// If NumThreads is 1024, master id is 992.
llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
//
// Private state and methods.
//

View File

@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
EmitOMPTargetTeamsDistributeParallelForDirective(
cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
break;
case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
EmitOMPTargetTeamsDistributeParallelForSimdDirective(
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
break;
}
}

View File

@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
});
}
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(
*this, OMPD_target_teams_distribute_parallel_for_simd,
[&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitStmt(
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
});
}
/// \brief Emit a helper variable and return corresponding lvalue.
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
const DeclRefExpr *Helper) {
@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto &RT = CGM.getOpenMPRuntime();
bool HasLastprivateClause = false;
// Check pre-condition.
{
OMPLoopScope PreInitScope(*this, S);
@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
OMPPrivateScope LoopScope(*this);
if (EmitOMPFirstprivateClause(S, LoopScope)) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables and post-update of
// lastprivate variables.
CGM.getOpenMPRuntime().emitBarrierCall(
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
EmitOMPPrivateClause(S, LoopScope);
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
LB.getAddress(), UB.getAddress(), ST.getAddress(),
IL.getAddress(), Chunk);
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
if (HasLastprivateClause)
EmitOMPLastprivateClauseFinal(
S, /*NoFinals=*/false,
Builder.CreateIsNotNull(
EmitLoadOfScalar(IL, S.getLocStart())));
}
// We're now done with the loop, so jump to the continuation block.

View File

@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
return ResTy;
}
static bool
shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
const ASTContext &Context) {
QualType T = FD->getReturnType();
// Avoid the optimization for functions that return a record type with a
// trivial destructor or another trivially copyable type.
if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
return !ClassDecl->hasTrivialDestructor();
}
return !T.isTriviallyCopyableType(Context);
}
void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
const CGFunctionInfo &FnInfo) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// function call is used by the caller, the behavior is undefined.
if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock &&
!FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
bool ShouldEmitUnreachable =
CGM.getCodeGenOpts().StrictReturn ||
shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
if (SanOpts.has(SanitizerKind::Return)) {
SanitizerScope SanScope(this);
llvm::Value *IsFalse = Builder.getFalse();
EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
SanitizerHandler::MissingReturn,
EmitCheckSourceLocation(FD->getLocation()), None);
} else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
EmitTrapCall(llvm::Intrinsic::trap);
} else if (ShouldEmitUnreachable) {
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
EmitTrapCall(llvm::Intrinsic::trap);
}
if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) {
Builder.CreateUnreachable();
Builder.ClearInsertionPoint();
}
Builder.CreateUnreachable();
Builder.ClearInsertionPoint();
}
// Emit the standard function epilogue.

View File

@ -2699,6 +2699,8 @@ public:
const OMPTargetTeamsDistributeDirective &S);
void EmitOMPTargetTeamsDistributeParallelForDirective(
const OMPTargetTeamsDistributeParallelForDirective &S);
void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
const OMPTargetTeamsDistributeParallelForSimdDirective &S);
/// Emit outlined function for the target directive.
static std::pair<llvm::Function * /*OutlinedFn*/,
@ -3569,7 +3571,7 @@ public:
// If we still have any arguments, emit them using the type of the argument.
for (auto *A : llvm::make_range(Arg, ArgRange.end()))
ArgTypes.push_back(getVarArgType(A));
ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
}

View File

@ -2235,6 +2235,15 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
UseSeparateSections)) {
CmdArgs.push_back("-plugin-opt=-data-sections");
}
if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
StringRef FName = A->getValue();
if (!llvm::sys::fs::exists(FName))
D.Diag(diag::err_drv_no_such_file) << FName;
else
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
}
}
/// This is a helper function for validating the optional refinement step
@ -3058,6 +3067,10 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
continue;
}
if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
Value == "-mbig-obj")
continue; // LLVM handles bigobj automatically
switch (C.getDefaultToolChain().getArch()) {
default:
break;
@ -4453,6 +4466,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
false))
CmdArgs.push_back("-fstrict-enums");
if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
true))
CmdArgs.push_back("-fno-strict-return");
if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
options::OPT_fno_strict_vtable_pointers,
false))

View File

@ -638,6 +638,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
ChromiumStyle.ContinuationIndentWidth = 8;
ChromiumStyle.IndentWidth = 4;
} else if (Language == FormatStyle::LK_JavaScript) {
ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
ChromiumStyle.AllowShortLoopsOnASingleLine = false;
} else {
ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;

View File

@ -1255,10 +1255,13 @@ void UnwrappedLineParser::tryToParseJSFunction() {
if (FormatTok->is(tok::l_brace))
tryToParseBracedList();
else
while (FormatTok->isNot(tok::l_brace) && !eof())
while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
nextToken();
}
if (FormatTok->is(tok::semi))
return;
parseChildBlock();
}

View File

@ -370,6 +370,26 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
break;
}
case Decl::ClassTemplateSpecialization: {
const auto *CTSD = cast<ClassTemplateSpecializationDecl>(DC);
if (CTSD->isCompleteDefinition())
Out << "[class template specialization] ";
else
Out << "<class template specialization> ";
Out << *CTSD;
break;
}
case Decl::ClassTemplatePartialSpecialization: {
const auto *CTPSD = cast<ClassTemplatePartialSpecializationDecl>(DC);
if (CTPSD->isCompleteDefinition())
Out << "[class template partial specialization] ";
else
Out << "<class template partial specialization> ";
Out << *CTPSD;
break;
}
default:
llvm_unreachable("a decl that inherits DeclContext isn't handled");
}
@ -400,7 +420,8 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
case Decl::CXXConstructor:
case Decl::CXXDestructor:
case Decl::CXXConversion:
{
case Decl::ClassTemplateSpecialization:
case Decl::ClassTemplatePartialSpecialization: {
DeclContext* DC = cast<DeclContext>(I);
PrintDeclContext(DC, Indentation+2);
break;
@ -478,6 +499,37 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
Out << "<omp threadprivate> " << '"' << I << "\"\n";
break;
}
case Decl::Friend: {
Out << "<friend>";
if (const NamedDecl *ND = cast<FriendDecl>(I)->getFriendDecl())
Out << ' ' << *ND;
Out << "\n";
break;
}
case Decl::Using: {
Out << "<using> " << *cast<UsingDecl>(I) << "\n";
break;
}
case Decl::UsingShadow: {
Out << "<using shadow> " << *cast<UsingShadowDecl>(I) << "\n";
break;
}
case Decl::Empty: {
Out << "<empty>\n";
break;
}
case Decl::AccessSpec: {
Out << "<access specifier>\n";
break;
}
case Decl::VarTemplate: {
Out << "<var template> " << *cast<VarTemplateDecl>(I) << "\n";
break;
}
case Decl::StaticAssert: {
Out << "<static assert>\n";
break;
}
default:
Out << "DeclKind: " << DK << '"' << I << "\"\n";
llvm_unreachable("decl unhandled");

View File

@ -602,6 +602,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
Opts.SoftFloat = Args.hasArg(OPT_msoft_float);
Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums);
Opts.StrictReturn = !Args.hasArg(OPT_fno_strict_return);
Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers);
Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
Args.hasArg(OPT_cl_unsafe_math_optimizations) ||

Some files were not shown because too many files have changed in this diff Show More