Merge llvm, clang, lld and lldb trunk r291012, and resolve conflicts.
This commit is contained in:
commit
8e0f8b8c96
@ -21,8 +21,8 @@
|
||||
// class MyClass : public RefCountedBase<MyClass> {};
|
||||
//
|
||||
// void foo() {
|
||||
// // Objects that inherit from RefCountedBase should always be instantiated
|
||||
// // on the heap, never on the stack.
|
||||
// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by
|
||||
// // 1 (from 0 in this case).
|
||||
// IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
|
||||
//
|
||||
// // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
|
||||
@ -68,9 +68,6 @@ namespace llvm {
|
||||
/// calls to Release() and Retain(), which increment and decrement the object's
|
||||
/// refcount, respectively. When a Release() call decrements the refcount to 0,
|
||||
/// the object deletes itself.
|
||||
///
|
||||
/// Objects that inherit from RefCountedBase should always be allocated with
|
||||
/// operator new.
|
||||
template <class Derived> class RefCountedBase {
|
||||
mutable unsigned RefCount = 0;
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/Sequence.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <algorithm>
|
||||
@ -107,6 +108,39 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Insert a sequence of new elements into the PriorityWorklist.
|
||||
template <typename SequenceT>
|
||||
typename std::enable_if<!std::is_convertible<SequenceT, T>::value>::type
|
||||
insert(SequenceT &&Input) {
|
||||
if (std::begin(Input) == std::end(Input))
|
||||
// Nothing to do for an empty input sequence.
|
||||
return;
|
||||
|
||||
// First pull the input sequence into the vector as a bulk append
|
||||
// operation.
|
||||
ptrdiff_t StartIndex = V.size();
|
||||
V.insert(V.end(), std::begin(Input), std::end(Input));
|
||||
// Now walk backwards fixing up the index map and deleting any duplicates.
|
||||
for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) {
|
||||
auto InsertResult = M.insert({V[i], i});
|
||||
if (InsertResult.second)
|
||||
continue;
|
||||
|
||||
// If the existing index is before this insert's start, nuke that one and
|
||||
// move it up.
|
||||
ptrdiff_t &Index = InsertResult.first->second;
|
||||
if (Index < StartIndex) {
|
||||
V[Index] = T();
|
||||
Index = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise the existing one comes first so just clear out the value in
|
||||
// this slot.
|
||||
V[i] = T();
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the last element of the PriorityWorklist.
|
||||
void pop_back() {
|
||||
assert(!empty() && "Cannot remove an element when empty!");
|
||||
@ -169,6 +203,11 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Reverse the items in the PriorityWorklist.
|
||||
///
|
||||
/// This does an in-place reversal. Other kinds of reverse aren't easy to
|
||||
/// support in the face of the worklist semantics.
|
||||
|
||||
/// Completely clear the PriorityWorklist
|
||||
void clear() {
|
||||
M.clear();
|
||||
|
@ -23,10 +23,9 @@ namespace llvm {
|
||||
class DataLayout;
|
||||
class MDNode;
|
||||
|
||||
/// isDereferenceablePointer - Return true if this is always a dereferenceable
|
||||
/// pointer. If the context instruction is specified perform context-sensitive
|
||||
/// analysis and return true if the pointer is dereferenceable at the
|
||||
/// specified instruction.
|
||||
/// Return true if this is always a dereferenceable pointer. If the context
|
||||
/// instruction is specified perform context-sensitive analysis and return true
|
||||
/// if the pointer is dereferenceable at the specified instruction.
|
||||
bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
|
||||
const Instruction *CtxI = nullptr,
|
||||
const DominatorTree *DT = nullptr);
|
||||
@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
|
||||
const Instruction *CtxI = nullptr,
|
||||
const DominatorTree *DT = nullptr);
|
||||
|
||||
/// isSafeToLoadUnconditionally - Return true if we know that executing a load
|
||||
/// from this value cannot trap.
|
||||
/// Return true if we know that executing a load from this value cannot trap.
|
||||
///
|
||||
/// If DT and ScanFrom are specified this method performs context-sensitive
|
||||
/// analysis and returns true if it is safe to load immediately before ScanFrom.
|
||||
@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
|
||||
Instruction *ScanFrom = nullptr,
|
||||
const DominatorTree *DT = nullptr);
|
||||
|
||||
/// DefMaxInstsToScan - the default number of maximum instructions
|
||||
/// to scan in the block, used by FindAvailableLoadedValue().
|
||||
/// The default number of maximum instructions to scan in the block, used by
|
||||
/// FindAvailableLoadedValue().
|
||||
extern cl::opt<unsigned> DefMaxInstsToScan;
|
||||
|
||||
/// \brief Scan backwards to see if we have the value of the given load
|
||||
/// available locally within a small number of instructions.
|
||||
/// Scan backwards to see if we have the value of the given load available
|
||||
/// locally within a small number of instructions.
|
||||
///
|
||||
/// You can use this function to scan across multiple blocks: after you call
|
||||
/// this function, if ScanFrom points at the beginning of the block, it's safe
|
||||
|
@ -208,6 +208,8 @@ public:
|
||||
SledKind Kind;
|
||||
bool AlwaysInstrument;
|
||||
const class Function *Fn;
|
||||
|
||||
void emit(int, MCStreamer *, const MCSymbol *) const;
|
||||
};
|
||||
|
||||
// All the sleds to be emitted.
|
||||
@ -216,6 +218,9 @@ public:
|
||||
// Helper function to record a given XRay sled.
|
||||
void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
|
||||
|
||||
/// Emit a table with all XRay instrumentation points.
|
||||
void emitXRayTable();
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// MachineFunctionPass Implementation.
|
||||
//===------------------------------------------------------------------===//
|
||||
|
@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass {
|
||||
/// such as BB == elt.NewBB.
|
||||
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
|
||||
|
||||
/// The DominatorTreeBase that is used to compute a normal dominator tree
|
||||
DominatorTreeBase<MachineBasicBlock>* DT;
|
||||
|
||||
/// \brief Apply all the recorded critical edges to the DT.
|
||||
/// This updates the underlying DT information in a way that uses
|
||||
/// the fast query path of DT as much as possible.
|
||||
@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass {
|
||||
|
||||
public:
|
||||
static char ID; // Pass ID, replacement for typeid
|
||||
DominatorTreeBase<MachineBasicBlock>* DT;
|
||||
|
||||
MachineDominatorTree();
|
||||
|
||||
|
@ -116,12 +116,12 @@ public:
|
||||
// An unsigned integer indicating the identity of the source file
|
||||
// corresponding to a machine instruction.
|
||||
uint16_t File;
|
||||
// An unsigned integer whose value encodes the applicable instruction set
|
||||
// architecture for the current instruction.
|
||||
uint8_t Isa;
|
||||
// An unsigned integer representing the DWARF path discriminator value
|
||||
// for this location.
|
||||
uint32_t Discriminator;
|
||||
// An unsigned integer whose value encodes the applicable instruction set
|
||||
// architecture for the current instruction.
|
||||
uint8_t Isa;
|
||||
// A boolean indicating that the current instruction is the beginning of a
|
||||
// statement.
|
||||
uint8_t IsStmt:1,
|
||||
|
@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
|
||||
// Instruction Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The first parameter is s_sendmsg immediate (i16),
|
||||
// the second one is copied to m0
|
||||
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
|
||||
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
|
||||
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
|
||||
|
||||
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
|
||||
Intrinsic<[], [], [IntrConvergent]>;
|
||||
|
||||
|
@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector extract and insert
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_mask_vextractf32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf32x4_256 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
|
||||
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti32x4_256 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
|
||||
llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf64x2_256 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti64x2_256 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf64x2_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
|
||||
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti64x2_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
|
||||
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf32x8_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
|
||||
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti32x8_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
|
||||
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextractf64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
|
||||
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vextracti64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
|
||||
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf32x4_256 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf32x4_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf32x8_512 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty],
|
||||
[llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf64x2_256 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf64x2_512 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_insertf64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty],
|
||||
[llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti32x4_256 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti32x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti32x4_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti32x8_512 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty],
|
||||
[llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti64x2_256 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti64x2_512 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_inserti64x4_512 :
|
||||
GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty],
|
||||
[llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Conditional load ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
|
||||
|
@ -769,17 +769,13 @@ namespace detail {
|
||||
std::error_code directory_iterator_increment(DirIterState &);
|
||||
std::error_code directory_iterator_destruct(DirIterState &);
|
||||
|
||||
/// DirIterState - Keeps state for the directory_iterator. It is reference
|
||||
/// counted in order to preserve InputIterator semantics on copy.
|
||||
struct DirIterState : public RefCountedBase<DirIterState> {
|
||||
DirIterState()
|
||||
: IterationHandle(0) {}
|
||||
|
||||
/// Keeps state for the directory_iterator.
|
||||
struct DirIterState {
|
||||
~DirIterState() {
|
||||
directory_iterator_destruct(*this);
|
||||
}
|
||||
|
||||
intptr_t IterationHandle;
|
||||
intptr_t IterationHandle = 0;
|
||||
directory_entry CurrentEntry;
|
||||
};
|
||||
} // end namespace detail
|
||||
@ -788,23 +784,23 @@ namespace detail {
|
||||
/// operator++ because we need an error_code. If it's really needed we can make
|
||||
/// it call report_fatal_error on error.
|
||||
class directory_iterator {
|
||||
IntrusiveRefCntPtr<detail::DirIterState> State;
|
||||
std::shared_ptr<detail::DirIterState> State;
|
||||
|
||||
public:
|
||||
explicit directory_iterator(const Twine &path, std::error_code &ec) {
|
||||
State = new detail::DirIterState;
|
||||
State = std::make_shared<detail::DirIterState>();
|
||||
SmallString<128> path_storage;
|
||||
ec = detail::directory_iterator_construct(*State,
|
||||
path.toStringRef(path_storage));
|
||||
}
|
||||
|
||||
explicit directory_iterator(const directory_entry &de, std::error_code &ec) {
|
||||
State = new detail::DirIterState;
|
||||
State = std::make_shared<detail::DirIterState>();
|
||||
ec = detail::directory_iterator_construct(*State, de.path());
|
||||
}
|
||||
|
||||
/// Construct end iterator.
|
||||
directory_iterator() : State(nullptr) {}
|
||||
directory_iterator() = default;
|
||||
|
||||
// No operator++ because we need error_code.
|
||||
directory_iterator &increment(std::error_code &ec) {
|
||||
|
@ -209,6 +209,15 @@ struct DocumentListTraits {
|
||||
// static T::value_type& element(IO &io, T &seq, size_t index);
|
||||
};
|
||||
|
||||
/// This class should be specialized by any type that needs to be converted
|
||||
/// to/from a YAML mapping in the case where the names of the keys are not known
|
||||
/// in advance, e.g. a string map.
|
||||
template <typename T>
|
||||
struct CustomMappingTraits {
|
||||
// static void inputOne(IO &io, StringRef key, T &elem);
|
||||
// static void output(IO &io, T &elem);
|
||||
};
|
||||
|
||||
// Only used for better diagnostics of missing traits
|
||||
template <typename T>
|
||||
struct MissingTrait;
|
||||
@ -358,6 +367,23 @@ public:
|
||||
static bool const value = (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
|
||||
};
|
||||
|
||||
// Test if CustomMappingTraits<T> is defined on type T.
|
||||
template <class T>
|
||||
struct has_CustomMappingTraits
|
||||
{
|
||||
typedef void (*Signature_input)(IO &io, StringRef key, T &v);
|
||||
|
||||
template <typename U>
|
||||
static char test(SameType<Signature_input, &U::inputOne>*);
|
||||
|
||||
template <typename U>
|
||||
static double test(...);
|
||||
|
||||
public:
|
||||
static bool const value =
|
||||
(sizeof(test<CustomMappingTraits<T>>(nullptr)) == 1);
|
||||
};
|
||||
|
||||
// has_FlowTraits<int> will cause an error with some compilers because
|
||||
// it subclasses int. Using this wrapper only instantiates the
|
||||
// real has_FlowTraits only if the template type is a class.
|
||||
@ -493,6 +519,7 @@ struct missingTraits
|
||||
!has_BlockScalarTraits<T>::value &&
|
||||
!has_MappingTraits<T, Context>::value &&
|
||||
!has_SequenceTraits<T>::value &&
|
||||
!has_CustomMappingTraits<T>::value &&
|
||||
!has_DocumentListTraits<T>::value> {};
|
||||
|
||||
template <typename T, typename Context>
|
||||
@ -531,6 +558,7 @@ public:
|
||||
virtual void endMapping() = 0;
|
||||
virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0;
|
||||
virtual void postflightKey(void*) = 0;
|
||||
virtual std::vector<StringRef> keys() = 0;
|
||||
|
||||
virtual void beginFlowMapping() = 0;
|
||||
virtual void endFlowMapping() = 0;
|
||||
@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<has_CustomMappingTraits<T>::value, void>::type
|
||||
yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
|
||||
if ( io.outputting() ) {
|
||||
io.beginMapping();
|
||||
CustomMappingTraits<T>::output(io, Val);
|
||||
io.endMapping();
|
||||
} else {
|
||||
io.beginMapping();
|
||||
for (StringRef key : io.keys())
|
||||
CustomMappingTraits<T>::inputOne(io, key, Val);
|
||||
io.endMapping();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
|
||||
yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
|
||||
@ -1074,6 +1117,7 @@ private:
|
||||
void endMapping() override;
|
||||
bool preflightKey(const char *, bool, bool, bool &, void *&) override;
|
||||
void postflightKey(void *) override;
|
||||
std::vector<StringRef> keys() override;
|
||||
void beginFlowMapping() override;
|
||||
void endFlowMapping() override;
|
||||
unsigned beginSequence() override;
|
||||
@ -1154,10 +1198,8 @@ private:
|
||||
|
||||
typedef llvm::StringMap<std::unique_ptr<HNode>> NameToNode;
|
||||
|
||||
bool isValidKey(StringRef key);
|
||||
|
||||
NameToNode Mapping;
|
||||
llvm::SmallVector<const char*, 6> ValidKeys;
|
||||
llvm::SmallVector<std::string, 6> ValidKeys;
|
||||
};
|
||||
|
||||
class SequenceHNode : public HNode {
|
||||
@ -1215,6 +1257,7 @@ public:
|
||||
void endMapping() override;
|
||||
bool preflightKey(const char *key, bool, bool, bool &, void *&) override;
|
||||
void postflightKey(void *) override;
|
||||
std::vector<StringRef> keys() override;
|
||||
void beginFlowMapping() override;
|
||||
void endFlowMapping() override;
|
||||
unsigned beginSequence() override;
|
||||
@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) {
|
||||
return In;
|
||||
}
|
||||
|
||||
// Define non-member operator>> so that Input can stream in a string map.
|
||||
template <typename T>
|
||||
inline
|
||||
typename std::enable_if<has_CustomMappingTraits<T>::value, Input &>::type
|
||||
operator>>(Input &In, T &Val) {
|
||||
EmptyContext Ctx;
|
||||
if (In.setCurrentDocument())
|
||||
yamlize(In, Val, true, Ctx);
|
||||
return In;
|
||||
}
|
||||
|
||||
// Provide better error message about types missing a trait specialization
|
||||
template <typename T>
|
||||
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
|
||||
@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) {
|
||||
return Out;
|
||||
}
|
||||
|
||||
// Define non-member operator<< so that Output can stream out a string map.
|
||||
template <typename T>
|
||||
inline
|
||||
typename std::enable_if<has_CustomMappingTraits<T>::value, Output &>::type
|
||||
operator<<(Output &Out, T &Val) {
|
||||
EmptyContext Ctx;
|
||||
Out.beginDocuments();
|
||||
if (Out.preflightDocument(0)) {
|
||||
yamlize(Out, Val, true, Ctx);
|
||||
Out.postflightDocument();
|
||||
}
|
||||
Out.endDocuments();
|
||||
return Out;
|
||||
}
|
||||
|
||||
// Provide better error message about types missing a trait specialization
|
||||
template <typename T>
|
||||
inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
|
||||
@ -1476,6 +1545,18 @@ template <typename T> struct SequenceTraitsImpl {
|
||||
}
|
||||
};
|
||||
|
||||
/// Implementation of CustomMappingTraits for std::map<std::string, T>.
|
||||
template <typename T> struct StdMapStringCustomMappingTraitsImpl {
|
||||
typedef std::map<std::string, T> map_type;
|
||||
static void inputOne(IO &io, StringRef key, map_type &v) {
|
||||
io.mapRequired(key.str().c_str(), v[key]);
|
||||
}
|
||||
static void output(IO &io, map_type &v) {
|
||||
for (auto &p : v)
|
||||
io.mapRequired(p.first.c_str(), p.second);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
@ -1530,4 +1611,15 @@ template <typename T> struct SequenceTraitsImpl {
|
||||
} \
|
||||
}
|
||||
|
||||
/// Utility for declaring that std::map<std::string, _type> should be considered
|
||||
/// a YAML map.
|
||||
#define LLVM_YAML_IS_STRING_MAP(_type) \
|
||||
namespace llvm { \
|
||||
namespace yaml { \
|
||||
template <> \
|
||||
struct CustomMappingTraits<std::map<std::string, _type>> \
|
||||
: public StdMapStringCustomMappingTraitsImpl<_type> {}; \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif // LLVM_SUPPORT_YAMLTRAITS_H
|
||||
|
@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
|
||||
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
|
||||
return !CFP->getValueAPF().isNegZero();
|
||||
|
||||
// FIXME: Magic number! At the least, this should be given a name because it's
|
||||
// used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
|
||||
// expose it as a parameter, so it can be used for testing / experimenting.
|
||||
if (Depth == MaxDepth)
|
||||
return false; // Limit search depth.
|
||||
|
||||
@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
|
||||
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
|
||||
return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
|
||||
|
||||
// FIXME: Magic number! At the least, this should be given a name because it's
|
||||
// used similarly in CannotBeNegativeZero(). A better fix may be to
|
||||
// expose it as a parameter, so it can be used for testing / experimenting.
|
||||
if (Depth == MaxDepth)
|
||||
return false; // Limit search depth.
|
||||
|
||||
|
@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
// handles the case where this is type ODRed with a definition needed
|
||||
// by the importing module, in which case the existing definition is
|
||||
// used.
|
||||
if (IsImporting && !ImportFullTypeDefinitions &&
|
||||
if (IsImporting && !ImportFullTypeDefinitions && Identifier &&
|
||||
(Tag == dwarf::DW_TAG_enumeration_type ||
|
||||
Tag == dwarf::DW_TAG_class_type ||
|
||||
Tag == dwarf::DW_TAG_structure_type ||
|
||||
|
@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
|
||||
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
|
||||
DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
|
||||
NoopInput = Op;
|
||||
} else if (isa<CallInst>(I)) {
|
||||
// Look through call (skipping callee)
|
||||
for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
|
||||
i != e; ++i) {
|
||||
unsigned attrInd = i - I->op_begin() + 1;
|
||||
if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
|
||||
isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
|
||||
NoopInput = *i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (isa<InvokeInst>(I)) {
|
||||
// Look through invoke (skipping BB, BB, Callee)
|
||||
for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
|
||||
i != e; ++i) {
|
||||
unsigned attrInd = i - I->op_begin() + 1;
|
||||
if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
|
||||
isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
|
||||
NoopInput = *i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (auto CS = ImmutableCallSite(I)) {
|
||||
const Value *ReturnedOp = CS.getReturnedArgOperand();
|
||||
if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
|
||||
NoopInput = ReturnedOp;
|
||||
} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
|
||||
// Value may come from either the aggregate or the scalar
|
||||
ArrayRef<unsigned> InsertLoc = IVI->getIndices();
|
||||
|
@ -37,6 +37,8 @@
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCSection.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbolELF.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {}
|
||||
|
||||
void AsmPrinterHandler::markFunctionEnd() {}
|
||||
|
||||
// In the binary's "xray_instr_map" section, an array of these function entries
|
||||
// describes each instrumentation point. When XRay patches your code, the index
|
||||
// into this table will be given to your handler as a patch point identifier.
|
||||
void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
|
||||
const MCSymbol *CurrentFnSym) const {
|
||||
Out->EmitSymbolValue(Sled, Bytes);
|
||||
Out->EmitSymbolValue(CurrentFnSym, Bytes);
|
||||
auto Kind8 = static_cast<uint8_t>(Kind);
|
||||
Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
|
||||
Out->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
|
||||
Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries
|
||||
}
|
||||
|
||||
void AsmPrinter::emitXRayTable() {
|
||||
if (Sleds.empty())
|
||||
return;
|
||||
|
||||
auto PrevSection = OutStreamer->getCurrentSectionOnly();
|
||||
auto Fn = MF->getFunction();
|
||||
MCSection *Section = nullptr;
|
||||
if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
|
||||
if (Fn->hasComdat()) {
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
|
||||
Fn->getComdat()->getName());
|
||||
} else {
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC);
|
||||
}
|
||||
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
|
||||
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
|
||||
SectionKind::getReadOnlyWithRel());
|
||||
} else {
|
||||
llvm_unreachable("Unsupported target");
|
||||
}
|
||||
|
||||
// Before we switch over, we force a reference to a label inside the
|
||||
// xray_instr_map section. Since this function is always called just
|
||||
// before the function's end, we assume that this is happening after
|
||||
// the last return instruction.
|
||||
|
||||
auto WordSizeBytes = TM.getPointerSize();
|
||||
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
|
||||
OutStreamer->EmitCodeAlignment(16);
|
||||
OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
|
||||
OutStreamer->SwitchSection(Section);
|
||||
OutStreamer->EmitLabel(Tmp);
|
||||
for (const auto &Sled : Sleds)
|
||||
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
|
||||
|
||||
OutStreamer->SwitchSection(PrevSection);
|
||||
Sleds.clear();
|
||||
}
|
||||
|
||||
void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
|
||||
SledKind Kind) {
|
||||
auto Fn = MI.getParent()->getParent()->getFunction();
|
||||
|
@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
|
||||
// earlier spill with smaller SlotIndex.
|
||||
for (const auto CurrentSpill : Spills) {
|
||||
MachineBasicBlock *Block = CurrentSpill->getParent();
|
||||
MachineDomTreeNode *Node = MDT.DT->getNode(Block);
|
||||
MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
|
||||
MachineInstr *PrevSpill = SpillBBToSpill[Node];
|
||||
if (PrevSpill) {
|
||||
SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
|
||||
@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
|
||||
MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
|
||||
MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
|
||||
SpillsToRm.push_back(SpillToRm);
|
||||
SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
|
||||
SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
|
||||
} else {
|
||||
SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
|
||||
SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
|
||||
}
|
||||
}
|
||||
for (const auto SpillToRm : SpillsToRm)
|
||||
@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
|
||||
// Sort the nodes in WorkSet in top-down order and save the nodes
|
||||
// in Orders. Orders will be used for hoisting in runHoistSpills.
|
||||
unsigned idx = 0;
|
||||
Orders.push_back(MDT.DT->getNode(Root));
|
||||
Orders.push_back(MDT.getBase().getNode(Root));
|
||||
do {
|
||||
MachineDomTreeNode *Node = Orders[idx++];
|
||||
const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
|
||||
|
@ -4277,7 +4277,8 @@ struct BaseIndexOffset {
|
||||
}
|
||||
|
||||
/// Parses tree in Ptr for base, index, offset addresses.
|
||||
static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
|
||||
static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
|
||||
int64_t PartialOffset = 0) {
|
||||
bool IsIndexSignExt = false;
|
||||
|
||||
// Split up a folded GlobalAddress+Offset into its component parts.
|
||||
@ -4286,7 +4287,7 @@ struct BaseIndexOffset {
|
||||
return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
|
||||
SDLoc(GA),
|
||||
GA->getValueType(0),
|
||||
/*Offset=*/0,
|
||||
/*Offset=*/PartialOffset,
|
||||
/*isTargetGA=*/false,
|
||||
GA->getTargetFlags()),
|
||||
SDValue(),
|
||||
@ -4298,14 +4299,13 @@ struct BaseIndexOffset {
|
||||
// instruction, then it could be just the BASE or everything else we don't
|
||||
// know how to handle. Just use Ptr as BASE and give up.
|
||||
if (Ptr->getOpcode() != ISD::ADD)
|
||||
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
|
||||
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
|
||||
|
||||
// We know that we have at least an ADD instruction. Try to pattern match
|
||||
// the simple case of BASE + OFFSET.
|
||||
if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
|
||||
int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
|
||||
return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
|
||||
IsIndexSignExt);
|
||||
return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
|
||||
}
|
||||
|
||||
// Inside a loop the current BASE pointer is calculated using an ADD and a
|
||||
@ -4314,7 +4314,7 @@ struct BaseIndexOffset {
|
||||
// (i64 mul (i64 %induction_var)
|
||||
// (i64 %element_size)))
|
||||
if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
|
||||
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
|
||||
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
|
||||
|
||||
// Look at Base + Index + Offset cases.
|
||||
SDValue Base = Ptr->getOperand(0);
|
||||
@ -4328,14 +4328,14 @@ struct BaseIndexOffset {
|
||||
|
||||
// Either the case of Base + Index (no offset) or something else.
|
||||
if (IndexOffset->getOpcode() != ISD::ADD)
|
||||
return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
|
||||
return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
|
||||
|
||||
// Now we have the case of Base + Index + offset.
|
||||
SDValue Index = IndexOffset->getOperand(0);
|
||||
SDValue Offset = IndexOffset->getOperand(1);
|
||||
|
||||
if (!isa<ConstantSDNode>(Offset))
|
||||
return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
|
||||
return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
|
||||
|
||||
// Ignore signextends.
|
||||
if (Index->getOpcode() == ISD::SIGN_EXTEND) {
|
||||
@ -4344,7 +4344,7 @@ struct BaseIndexOffset {
|
||||
} else IsIndexSignExt = false;
|
||||
|
||||
int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
|
||||
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
|
||||
return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted(
|
||||
// Use symbol info to iterate functions in the object.
|
||||
for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
|
||||
SymbolRef Sym = P.first;
|
||||
if (Sym.getType() != SymbolRef::ST_Function)
|
||||
if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function)
|
||||
continue;
|
||||
|
||||
ErrorOr<StringRef> NameOrErr = Sym.getName();
|
||||
if (NameOrErr.getError())
|
||||
Expected<StringRef> NameOrErr = Sym.getName();
|
||||
if (!NameOrErr)
|
||||
continue;
|
||||
StringRef Name = *NameOrErr;
|
||||
ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
|
||||
if (AddrOrErr.getError())
|
||||
Expected<uint64_t> AddrOrErr = Sym.getAddress();
|
||||
if (!AddrOrErr)
|
||||
continue;
|
||||
uint64_t Addr = *AddrOrErr;
|
||||
uint64_t Size = P.second;
|
||||
@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
|
||||
for (symbol_iterator I = DebugObj.symbol_begin(),
|
||||
E = DebugObj.symbol_end();
|
||||
I != E; ++I) {
|
||||
if (I->getType() == SymbolRef::ST_Function) {
|
||||
ErrorOr<uint64_t> AddrOrErr = I->getAddress();
|
||||
if (AddrOrErr.getError())
|
||||
if (I->getType() && *I->getType() == SymbolRef::ST_Function) {
|
||||
Expected<uint64_t> AddrOrErr = I->getAddress();
|
||||
if (!AddrOrErr)
|
||||
continue;
|
||||
uint64_t Addr = *AddrOrErr;
|
||||
|
||||
|
@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
|
||||
Name.startswith("avx.vinsertf128.") || // Added in 3.7
|
||||
Name == "avx2.vinserti128" || // Added in 3.7
|
||||
Name.startswith("avx512.mask.insert") || // Added in 4.0
|
||||
Name.startswith("avx.vextractf128.") || // Added in 3.7
|
||||
Name == "avx2.vextracti128" || // Added in 3.7
|
||||
Name.startswith("avx512.mask.vextract") || // Added in 4.0
|
||||
Name.startswith("sse4a.movnt.") || // Added in 3.9
|
||||
Name.startswith("avx.movnt.") || // Added in 3.2
|
||||
Name.startswith("avx512.storent.") || // Added in 3.9
|
||||
@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
|
||||
} else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
|
||||
Name == "avx2.vinserti128")) {
|
||||
Name == "avx2.vinserti128" ||
|
||||
Name.startswith("avx512.mask.insert"))) {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
Value *Op1 = CI->getArgOperand(1);
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
unsigned DstNumElts = CI->getType()->getVectorNumElements();
|
||||
unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
|
||||
unsigned Scale = DstNumElts / SrcNumElts;
|
||||
|
||||
// Mask off the high bits of the immediate value; hardware ignores those.
|
||||
Imm = Imm & 1;
|
||||
Imm = Imm % Scale;
|
||||
|
||||
// Extend the second operand into a vector that is twice as big.
|
||||
// Extend the second operand into a vector the size of the destination.
|
||||
Value *UndefV = UndefValue::get(Op1->getType());
|
||||
SmallVector<uint32_t, 8> Idxs(NumElts);
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
SmallVector<uint32_t, 8> Idxs(DstNumElts);
|
||||
for (unsigned i = 0; i != SrcNumElts; ++i)
|
||||
Idxs[i] = i;
|
||||
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
|
||||
Idxs[i] = SrcNumElts;
|
||||
Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
|
||||
|
||||
// Insert the second operand into the first operand.
|
||||
@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
|
||||
|
||||
// The low half of the result is either the low half of the 1st operand
|
||||
// or the low half of the 2nd operand (the inserted vector).
|
||||
for (unsigned i = 0; i != NumElts / 2; ++i)
|
||||
Idxs[i] = Imm ? i : (i + NumElts);
|
||||
// The high half of the result is either the low half of the 2nd operand
|
||||
// (the inserted vector) or the high half of the 1st operand.
|
||||
for (unsigned i = NumElts / 2; i != NumElts; ++i)
|
||||
Idxs[i] = Imm ? (i + NumElts / 2) : i;
|
||||
// First fill with identify mask.
|
||||
for (unsigned i = 0; i != DstNumElts; ++i)
|
||||
Idxs[i] = i;
|
||||
// Then replace the elements where we need to insert.
|
||||
for (unsigned i = 0; i != SrcNumElts; ++i)
|
||||
Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
|
||||
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
|
||||
|
||||
// If the intrinsic has a mask operand, handle that.
|
||||
if (CI->getNumArgOperands() == 5)
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
|
||||
CI->getArgOperand(3));
|
||||
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
|
||||
Name == "avx2.vextracti128")) {
|
||||
Name == "avx2.vextracti128" ||
|
||||
Name.startswith("avx512.mask.vextract"))) {
|
||||
Value *Op0 = CI->getArgOperand(0);
|
||||
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
|
||||
VectorType *VecTy = cast<VectorType>(CI->getType());
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
unsigned DstNumElts = CI->getType()->getVectorNumElements();
|
||||
unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
|
||||
unsigned Scale = SrcNumElts / DstNumElts;
|
||||
|
||||
// Mask off the high bits of the immediate value; hardware ignores those.
|
||||
Imm = Imm & 1;
|
||||
Imm = Imm % Scale;
|
||||
|
||||
// Get indexes for either the high half or low half of the input vector.
|
||||
SmallVector<uint32_t, 4> Idxs(NumElts);
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
Idxs[i] = Imm ? (i + NumElts) : i;
|
||||
// Get indexes for the subvector of the input vector.
|
||||
SmallVector<uint32_t, 8> Idxs(DstNumElts);
|
||||
for (unsigned i = 0; i != DstNumElts; ++i) {
|
||||
Idxs[i] = i + (Imm * DstNumElts);
|
||||
}
|
||||
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
|
||||
|
||||
Value *UndefV = UndefValue::get(Op0->getType());
|
||||
Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
|
||||
// If the intrinsic has a mask operand, handle that.
|
||||
if (CI->getNumArgOperands() == 4)
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
|
||||
CI->getArgOperand(2));
|
||||
} else if (!IsX86 && Name == "stackprotectorcheck") {
|
||||
Rep = nullptr;
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
|
||||
|
@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
|
||||
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
|
||||
AddStream, Cache);
|
||||
|
||||
// Partition numbers for ThinLTO jobs start at 1 (see comments for
|
||||
// GlobalResolution in LTO.h). Task numbers, however, start at
|
||||
// ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
|
||||
// through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
|
||||
// generation partitions.
|
||||
// Task numbers start at ParallelCodeGenParallelismLevel if an LTO
|
||||
// module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1
|
||||
// are reserved for parallel code generation partitions.
|
||||
unsigned Task =
|
||||
HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0;
|
||||
unsigned Partition = 1;
|
||||
|
||||
for (auto &Mod : ThinLTO.ModuleMap) {
|
||||
if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
|
||||
ExportLists[Mod.first],
|
||||
ResolvedODR[Mod.first], ThinLTO.ModuleMap))
|
||||
return E;
|
||||
|
||||
++Task;
|
||||
++Partition;
|
||||
}
|
||||
|
||||
return BackendProc->wait();
|
||||
|
@ -76,8 +76,12 @@ namespace llvm {
|
||||
compile-time arithmetic on PPC double-double numbers, it is not able
|
||||
to represent all possible values held by a PPC double-double number,
|
||||
for example: (long double) 1.0 + (long double) 0x1p-106
|
||||
Should this be replaced by a full emulation of PPC double-double? */
|
||||
static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0};
|
||||
Should this be replaced by a full emulation of PPC double-double?
|
||||
|
||||
Note: we need to make the value different from semBogus as otherwise
|
||||
an unsafe optimization may collapse both values to a single address,
|
||||
and we heavily rely on them having distinct addresses. */
|
||||
static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
|
||||
|
||||
/* There are temporary semantics for the real PPCDoubleDouble implementation.
|
||||
Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
|
||||
|
@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() {
|
||||
.Case("POWER7", "pwr7")
|
||||
.Case("POWER8", "pwr8")
|
||||
.Case("POWER8E", "pwr8")
|
||||
.Case("POWER8NVL", "pwr8")
|
||||
.Case("POWER9", "pwr9")
|
||||
.Default(generic);
|
||||
}
|
||||
|
@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
|
||||
N *= 100.0;
|
||||
|
||||
char Buf[32];
|
||||
unsigned Len;
|
||||
Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
|
||||
if (Style == FloatStyle::Percent)
|
||||
++Len;
|
||||
format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
|
||||
S << Buf;
|
||||
if (Style == FloatStyle::Percent)
|
||||
S << '%';
|
||||
|
@ -118,6 +118,18 @@ void Input::beginMapping() {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<StringRef> Input::keys() {
|
||||
MapHNode *MN = dyn_cast<MapHNode>(CurrentNode);
|
||||
std::vector<StringRef> Ret;
|
||||
if (!MN) {
|
||||
setError(CurrentNode, "not a mapping");
|
||||
return Ret;
|
||||
}
|
||||
for (auto &P : MN->Mapping)
|
||||
Ret.push_back(P.first());
|
||||
return Ret;
|
||||
}
|
||||
|
||||
bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault,
|
||||
void *&SaveInfo) {
|
||||
UseDefault = false;
|
||||
@ -163,7 +175,7 @@ void Input::endMapping() {
|
||||
if (!MN)
|
||||
return;
|
||||
for (const auto &NN : MN->Mapping) {
|
||||
if (!MN->isValidKey(NN.first())) {
|
||||
if (!is_contained(MN->ValidKeys, NN.first())) {
|
||||
setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'");
|
||||
break;
|
||||
}
|
||||
@ -373,14 +385,6 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
|
||||
}
|
||||
}
|
||||
|
||||
bool Input::MapHNode::isValidKey(StringRef Key) {
|
||||
for (const char *K : ValidKeys) {
|
||||
if (Key.equals(K))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Input::setError(const Twine &Message) {
|
||||
this->setError(CurrentNode, Message);
|
||||
}
|
||||
@ -451,6 +455,10 @@ void Output::endMapping() {
|
||||
StateStack.pop_back();
|
||||
}
|
||||
|
||||
std::vector<StringRef> Output::keys() {
|
||||
report_fatal_error("invalid call");
|
||||
}
|
||||
|
||||
bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
|
||||
bool &UseDefault, void *&) {
|
||||
UseDefault = false;
|
||||
|
@ -11,9 +11,15 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/TableGen/StringMatcher.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/TableGen/StringMatcher.h"
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// FindFirstNonCommonLetter - Find the first character in the keys of the
|
||||
@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
|
||||
}
|
||||
|
||||
// Bucket the matches by the character we are comparing.
|
||||
std::map<char, std::vector<const StringPair*> > MatchesByLetter;
|
||||
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
|
||||
|
||||
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
|
||||
MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
|
||||
@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
|
||||
// FIXME: Need to escape general strings.
|
||||
OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
|
||||
<< ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
|
||||
<< NumChars << "))\n";
|
||||
<< NumChars << ") != 0)\n";
|
||||
OS << Indent << " break;\n";
|
||||
}
|
||||
|
||||
@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
|
||||
OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
|
||||
OS << Indent << "default: break;\n";
|
||||
|
||||
for (std::map<char, std::vector<const StringPair*> >::iterator LI =
|
||||
for (std::map<char, std::vector<const StringPair*>>::iterator LI =
|
||||
MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
|
||||
// TODO: escape hard stuff (like \n) if we ever care about it.
|
||||
OS << Indent << "case '" << LI->first << "':\t // "
|
||||
@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// Emit - Top level entry point.
|
||||
///
|
||||
void StringMatcher::Emit(unsigned Indent) const {
|
||||
@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const {
|
||||
if (Matches.empty()) return;
|
||||
|
||||
// First level categorization: group strings by length.
|
||||
std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
|
||||
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
|
||||
|
||||
for (unsigned i = 0, e = Matches.size(); i != e; ++i)
|
||||
MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
|
||||
@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const {
|
||||
OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
|
||||
OS.indent(Indent*2+2) << "default: break;\n";
|
||||
|
||||
for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
|
||||
for (std::map<unsigned, std::vector<const StringPair*>>::iterator LI =
|
||||
MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
|
||||
OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
|
||||
<< LI->second.size()
|
||||
|
@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
|
||||
"Qualcomm Falkor processors", [
|
||||
FeatureCRC,
|
||||
FeatureCrypto,
|
||||
FeatureCustomCheapAsMoveHandling,
|
||||
FeatureFPARMv8,
|
||||
FeatureNEON,
|
||||
FeaturePerfMon
|
||||
FeaturePerfMon,
|
||||
FeaturePostRAScheduler,
|
||||
FeaturePredictableSelectIsExpensive,
|
||||
FeatureZCZeroing
|
||||
]>;
|
||||
|
||||
def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
|
||||
|
@ -76,7 +76,6 @@ public:
|
||||
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
|
||||
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
|
||||
|
||||
void EmitXRayTable();
|
||||
void EmitSled(const MachineInstr &MI, SledKind Kind);
|
||||
|
||||
/// \brief tblgen'erated driver function for lowering simple MI->MC
|
||||
@ -95,7 +94,7 @@ public:
|
||||
AArch64FI = F.getInfo<AArch64FunctionInfo>();
|
||||
STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
|
||||
bool Result = AsmPrinter::runOnMachineFunction(F);
|
||||
EmitXRayTable();
|
||||
emitXRayTable();
|
||||
return Result;
|
||||
}
|
||||
|
||||
@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
|
||||
EmitSled(MI, SledKind::TAIL_CALL);
|
||||
}
|
||||
|
||||
void AArch64AsmPrinter::EmitXRayTable()
|
||||
{
|
||||
//TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
|
||||
// code duplication as it is now for x86_64, ARM32 and AArch64.
|
||||
if (Sleds.empty())
|
||||
return;
|
||||
|
||||
auto PrevSection = OutStreamer->getCurrentSectionOnly();
|
||||
auto Fn = MF->getFunction();
|
||||
MCSection *Section;
|
||||
|
||||
if (STI->isTargetELF()) {
|
||||
if (Fn->hasComdat())
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
|
||||
Fn->getComdat()->getName());
|
||||
else
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC);
|
||||
} else if (STI->isTargetMachO()) {
|
||||
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
|
||||
SectionKind::getReadOnlyWithRel());
|
||||
} else {
|
||||
llvm_unreachable("Unsupported target");
|
||||
}
|
||||
|
||||
// Before we switch over, we force a reference to a label inside the
|
||||
// xray_instr_map section. Since EmitXRayTable() is always called just
|
||||
// before the function's end, we assume that this is happening after the
|
||||
// last return instruction.
|
||||
//
|
||||
// We then align the reference to 16 byte boundaries, which we determined
|
||||
// experimentally to be beneficial to avoid causing decoder stalls.
|
||||
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
|
||||
OutStreamer->EmitCodeAlignment(16);
|
||||
OutStreamer->EmitSymbolValue(Tmp, 8, false);
|
||||
OutStreamer->SwitchSection(Section);
|
||||
OutStreamer->EmitLabel(Tmp);
|
||||
for (const auto &Sled : Sleds) {
|
||||
OutStreamer->EmitSymbolValue(Sled.Sled, 8);
|
||||
OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
|
||||
auto Kind = static_cast<uint8_t>(Sled.Kind);
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Kind), 1));
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
|
||||
OutStreamer->EmitZeros(14);
|
||||
}
|
||||
OutStreamer->SwitchSection(PrevSection);
|
||||
|
||||
Sleds.clear();
|
||||
}
|
||||
|
||||
void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
|
||||
{
|
||||
static const int8_t NoopsInSledCount = 7;
|
||||
|
@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
|
||||
bool IsUnscaled = TII->isUnscaledLdSt(MI);
|
||||
int Offset = getLdStOffsetOp(MI).getImm();
|
||||
int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
|
||||
// Allow one more for offset.
|
||||
if (Offset > 0)
|
||||
Offset -= OffsetStride;
|
||||
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
|
||||
return false;
|
||||
|
||||
|
@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(KILL)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(SENDMSG)
|
||||
NODE_NAME_CASE(SENDMSGHALT)
|
||||
NODE_NAME_CASE(INTERP_MOV)
|
||||
NODE_NAME_CASE(INTERP_P1)
|
||||
NODE_NAME_CASE(INTERP_P2)
|
||||
|
@ -313,6 +313,7 @@ enum NodeType : unsigned {
|
||||
/// Pointer to the start of the shader's constant data.
|
||||
CONST_DATA_PTR,
|
||||
SENDMSG,
|
||||
SENDMSGHALT,
|
||||
INTERP_MOV,
|
||||
INTERP_P1,
|
||||
INTERP_P2,
|
||||
|
@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
|
||||
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
|
||||
[SDNPHasChain, SDNPInGlue]>;
|
||||
|
||||
def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
|
||||
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
|
||||
[SDNPHasChain, SDNPInGlue]>;
|
||||
|
||||
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::SI_sendmsg: {
|
||||
case AMDGPUIntrinsic::SI_sendmsg:
|
||||
case Intrinsic::amdgcn_s_sendmsg: {
|
||||
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
|
||||
SDValue Glue = Chain.getValue(1);
|
||||
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2), Glue);
|
||||
}
|
||||
case Intrinsic::amdgcn_s_sendmsghalt: {
|
||||
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
|
||||
SDValue Glue = Chain.getValue(1);
|
||||
return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2), Glue);
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_tbuffer_store: {
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
|
@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
|
||||
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
|
||||
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
|
||||
if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
|
||||
LastInstWritesM0 = false;
|
||||
return;
|
||||
@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
// signalling other hardware blocks
|
||||
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
|
||||
ST->needWaitcntBeforeBarrier()) ||
|
||||
I->getOpcode() == AMDGPU::S_SENDMSG)
|
||||
I->getOpcode() == AMDGPU::S_SENDMSG ||
|
||||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
|
||||
Required = LastIssued;
|
||||
else
|
||||
Required = handleOperands(*I);
|
||||
|
@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
|
||||
[(AMDGPUsendmsg (i32 imm:$simm16))]
|
||||
>;
|
||||
|
||||
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
|
||||
[(AMDGPUsendmsghalt (i32 imm:$simm16))]
|
||||
>;
|
||||
} // End Uses = [EXEC, M0]
|
||||
|
||||
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
|
||||
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
|
||||
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
|
||||
let simm16 = 0;
|
||||
|
@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Emit the rest of the function body.
|
||||
EmitFunctionBody();
|
||||
|
||||
// Emit the XRay table for this function.
|
||||
EmitXRayTable();
|
||||
|
||||
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
|
||||
// These are created per function, rather than per TU, since it's
|
||||
// relatively easy to exceed the thumb branch range within a TU.
|
||||
|
@ -113,9 +113,6 @@ public:
|
||||
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
|
||||
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
|
||||
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
|
||||
// Helper function that emits the XRay sleds we've collected for a particular
|
||||
// function.
|
||||
void EmitXRayTable();
|
||||
|
||||
private:
|
||||
void EmitSled(const MachineInstr &MI, SledKind Kind);
|
||||
|
@ -22,9 +22,6 @@
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCSymbolELF.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/MC/MCInstBuilder.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
using namespace llvm;
|
||||
@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
|
||||
{
|
||||
EmitSled(MI, SledKind::TAIL_CALL);
|
||||
}
|
||||
|
||||
void ARMAsmPrinter::EmitXRayTable()
|
||||
{
|
||||
if (Sleds.empty())
|
||||
return;
|
||||
|
||||
MCSection *Section = nullptr;
|
||||
if (Subtarget->isTargetELF()) {
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC | ELF::SHF_GROUP |
|
||||
ELF::SHF_MERGE,
|
||||
0, CurrentFnSym->getName());
|
||||
} else if (Subtarget->isTargetMachO()) {
|
||||
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
|
||||
SectionKind::getReadOnlyWithRel());
|
||||
} else {
|
||||
llvm_unreachable("Unsupported target");
|
||||
}
|
||||
|
||||
auto PrevSection = OutStreamer->getCurrentSectionOnly();
|
||||
OutStreamer->SwitchSection(Section);
|
||||
for (const auto &Sled : Sleds) {
|
||||
OutStreamer->EmitSymbolValue(Sled.Sled, 4);
|
||||
OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
|
||||
auto Kind = static_cast<uint8_t>(Sled.Kind);
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Kind), 1));
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
|
||||
OutStreamer->EmitZeros(6);
|
||||
}
|
||||
OutStreamer->SwitchSection(PrevSection);
|
||||
|
||||
Sleds.clear();
|
||||
}
|
||||
|
@ -53,28 +53,36 @@
|
||||
//
|
||||
// The code below is intended to be fully target-independent.
|
||||
|
||||
#include "BitTracker.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
#include "BitTracker.h"
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
typedef BitTracker BT;
|
||||
|
||||
namespace {
|
||||
|
||||
// Local trickery to pretty print a register (without the whole "%vreg"
|
||||
// business).
|
||||
struct printv {
|
||||
printv(unsigned r) : R(r) {}
|
||||
|
||||
unsigned R;
|
||||
};
|
||||
|
||||
raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
|
||||
if (PV.R)
|
||||
OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
|
||||
@ -82,9 +90,11 @@ namespace {
|
||||
OS << 's';
|
||||
return OS;
|
||||
}
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
namespace llvm {
|
||||
|
||||
raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
|
||||
switch (BV.Type) {
|
||||
case BT::BitValue::Top:
|
||||
@ -167,14 +177,14 @@ namespace llvm {
|
||||
|
||||
return OS;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
void BitTracker::print_cells(raw_ostream &OS) const {
|
||||
for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
|
||||
dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
|
||||
}
|
||||
|
||||
|
||||
BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
|
||||
: Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
|
||||
|
||||
@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
|
||||
delete ⤅
|
||||
}
|
||||
|
||||
|
||||
// If we were allowed to update a cell for a part of a register, the meet
|
||||
// operation would need to be parametrized by the register number and the
|
||||
// exact part of the register, so that the computer BitRefs correspond to
|
||||
@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
||||
// Insert the entire cell RC into the current cell at position given by M.
|
||||
BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
|
||||
const BitMask &M) {
|
||||
@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
|
||||
uint16_t B = M.first(), E = M.last(), W = width();
|
||||
assert(B < W && E < W);
|
||||
@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
|
||||
return RC;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
|
||||
// Rotate left (i.e. towards increasing bit indices).
|
||||
// Swap the two parts: [0..W-Sh-1] [W-Sh..W-1]
|
||||
@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
|
||||
const BitValue &V) {
|
||||
assert(B <= E);
|
||||
@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
|
||||
// Append the cell given as the argument to the "this" cell.
|
||||
// Bit 0 of RC becomes bit W of the result, where W is this->width().
|
||||
@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
uint16_t BT::RegisterCell::ct(bool B) const {
|
||||
uint16_t W = width();
|
||||
uint16_t C = 0;
|
||||
@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
|
||||
return C;
|
||||
}
|
||||
|
||||
|
||||
uint16_t BT::RegisterCell::cl(bool B) const {
|
||||
uint16_t W = width();
|
||||
uint16_t C = 0;
|
||||
@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
|
||||
return C;
|
||||
}
|
||||
|
||||
|
||||
bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
|
||||
uint16_t W = Bits.size();
|
||||
if (RC.Bits.size() != W)
|
||||
@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
|
||||
// The general problem is with finding a register class that corresponds
|
||||
// to a given reference reg:sub. There can be several such classes, and
|
||||
@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
|
||||
return BW;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
|
||||
const CellMapType &M) const {
|
||||
uint16_t BW = getRegBitWidth(RR);
|
||||
@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
|
||||
return RegisterCell::top(BW);
|
||||
}
|
||||
|
||||
|
||||
void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
|
||||
CellMapType &M) const {
|
||||
// While updating the cell map can be done in a meaningful way for
|
||||
@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
|
||||
M[RR.Reg] = RC;
|
||||
}
|
||||
|
||||
|
||||
// Check if the cell represents a compile-time integer value.
|
||||
bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
|
||||
uint16_t W = A.width();
|
||||
@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Convert a cell to the integer value. The result must fit in uint64_t.
|
||||
uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
|
||||
assert(isInt(A));
|
||||
@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
|
||||
return Val;
|
||||
}
|
||||
|
||||
|
||||
// Evaluator helper functions. These implement some common operation on
|
||||
// register cells that can be used to implement target-specific instructions
|
||||
// in a target-specific evaluator.
|
||||
@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
|
||||
const APInt &A = CI->getValue();
|
||||
uint16_t BW = A.getBitWidth();
|
||||
@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width() + A2.width();
|
||||
uint16_t Z = A1.ct(0) + A2.ct(0);
|
||||
uint16_t Z = A1.ct(false) + A2.ct(false);
|
||||
RegisterCell Res(W);
|
||||
Res.fill(0, Z, BitValue::Zero);
|
||||
Res.fill(Z, W, BitValue::self());
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width() + A2.width();
|
||||
uint16_t Z = A1.ct(0) + A2.ct(0);
|
||||
uint16_t Z = A1.ct(false) + A2.ct(false);
|
||||
RegisterCell Res(W);
|
||||
Res.fill(0, Z, BitValue::Zero);
|
||||
Res.fill(Z, W, BitValue::self());
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
|
||||
uint16_t Sh) const {
|
||||
assert(Sh <= A1.width());
|
||||
@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
|
||||
uint16_t Sh) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
|
||||
uint16_t Sh) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
|
||||
const RegisterCell &A2) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
|
||||
uint16_t W = A1.width();
|
||||
RegisterCell Res(W);
|
||||
@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
|
||||
uint16_t BitN) const {
|
||||
assert(BitN < A1.width());
|
||||
@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
|
||||
uint16_t BitN) const {
|
||||
assert(BitN < A1.width());
|
||||
@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
|
||||
uint16_t W) const {
|
||||
uint16_t C = A1.cl(B), AW = A1.width();
|
||||
@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
|
||||
return RegisterCell::self(0, W);
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
|
||||
uint16_t W) const {
|
||||
uint16_t C = A1.ct(B), AW = A1.width();
|
||||
@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
|
||||
return RegisterCell::self(0, W);
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
|
||||
uint16_t FromN) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
|
||||
uint16_t FromN) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
|
||||
uint16_t B, uint16_t E) const {
|
||||
uint16_t W = A1.width();
|
||||
@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
|
||||
const RegisterCell &A2, uint16_t AtN) const {
|
||||
uint16_t W1 = A1.width(), W2 = A2.width();
|
||||
@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
|
||||
assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
|
||||
uint16_t W = getRegBitWidth(Reg);
|
||||
@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// Main W-Z implementation.
|
||||
|
||||
void BT::visitPHI(const MachineInstr &PI) {
|
||||
@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void BT::visitUsesOf(unsigned Reg) {
|
||||
if (Trace)
|
||||
dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
|
||||
@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BT::RegisterCell BT::get(RegisterRef RR) const {
|
||||
return ME.getCell(RR, Map);
|
||||
}
|
||||
|
||||
|
||||
void BT::put(RegisterRef RR, const RegisterCell &RC) {
|
||||
ME.putCell(RR, RC, Map);
|
||||
}
|
||||
|
||||
|
||||
// Replace all references to bits from OldRR with the corresponding bits
|
||||
// in NewRR.
|
||||
void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
|
||||
@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check if the block has been "executed" during propagation. (If not, the
|
||||
// block is dead, but it may still appear to be reachable.)
|
||||
bool BT::reached(const MachineBasicBlock *B) const {
|
||||
@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Visit an individual instruction. This could be a newly added instruction,
|
||||
// or one that has been modified by an optimization.
|
||||
void BT::visit(const MachineInstr &MI) {
|
||||
@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
|
||||
FlowQ.pop();
|
||||
}
|
||||
|
||||
|
||||
void BT::reset() {
|
||||
EdgeExec.clear();
|
||||
InstrExec.clear();
|
||||
Map.clear();
|
||||
}
|
||||
|
||||
|
||||
void BT::run() {
|
||||
reset();
|
||||
assert(FlowQ.empty());
|
||||
@ -1141,4 +1106,3 @@ void BT::run() {
|
||||
if (Trace)
|
||||
print_cells(dbgs() << "Cells after propagation:\n");
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===--- BitTracker.h -----------------------------------------------------===//
|
||||
//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -7,24 +7,27 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef BITTRACKER_H
|
||||
#define BITTRACKER_H
|
||||
#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
|
||||
#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
|
||||
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
|
||||
namespace llvm {
|
||||
class ConstantInt;
|
||||
class MachineRegisterInfo;
|
||||
class MachineBasicBlock;
|
||||
class MachineInstr;
|
||||
class MachineOperand;
|
||||
class raw_ostream;
|
||||
|
||||
class ConstantInt;
|
||||
class MachineRegisterInfo;
|
||||
class MachineBasicBlock;
|
||||
class MachineInstr;
|
||||
class raw_ostream;
|
||||
|
||||
struct BitTracker {
|
||||
struct BitRef;
|
||||
@ -76,19 +79,19 @@ private:
|
||||
CellMapType ⤅
|
||||
};
|
||||
|
||||
|
||||
// Abstraction of a reference to bit at position Pos from a register Reg.
|
||||
struct BitTracker::BitRef {
|
||||
BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
|
||||
|
||||
bool operator== (const BitRef &BR) const {
|
||||
// If Reg is 0, disregard Pos.
|
||||
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
|
||||
}
|
||||
|
||||
unsigned Reg;
|
||||
uint16_t Pos;
|
||||
};
|
||||
|
||||
|
||||
// Abstraction of a register reference in MachineOperand. It contains the
|
||||
// register number and the subregister index.
|
||||
struct BitTracker::RegisterRef {
|
||||
@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
|
||||
: Reg(R), Sub(S) {}
|
||||
RegisterRef(const MachineOperand &MO)
|
||||
: Reg(MO.getReg()), Sub(MO.getSubReg()) {}
|
||||
|
||||
unsigned Reg, Sub;
|
||||
};
|
||||
|
||||
|
||||
// Value that a single bit can take. This is outside of the context of
|
||||
// any register, it is more of an abstraction of the two-element set of
|
||||
// possible bit values. One extension here is the "Ref" type, which
|
||||
@ -158,6 +161,7 @@ struct BitTracker::BitValue {
|
||||
bool operator!= (const BitValue &V) const {
|
||||
return !operator==(V);
|
||||
}
|
||||
|
||||
bool is(unsigned T) const {
|
||||
assert(T == 0 || T == 1);
|
||||
return T == 0 ? Type == Zero
|
||||
@ -209,6 +213,7 @@ struct BitTracker::BitValue {
|
||||
bool num() const {
|
||||
return Type == Zero || Type == One;
|
||||
}
|
||||
|
||||
operator bool() const {
|
||||
assert(Type == Zero || Type == One);
|
||||
return Type == One;
|
||||
@ -217,7 +222,6 @@ struct BitTracker::BitValue {
|
||||
friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
|
||||
};
|
||||
|
||||
|
||||
// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
|
||||
inline BitTracker::BitValue
|
||||
BitTracker::BitValue::ref(const BitValue &V) {
|
||||
@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) {
|
||||
return self();
|
||||
}
|
||||
|
||||
|
||||
inline BitTracker::BitValue
|
||||
BitTracker::BitValue::self(const BitRef &Self) {
|
||||
return BitValue(Self.Reg, Self.Pos);
|
||||
}
|
||||
|
||||
|
||||
// A sequence of bits starting from index B up to and including index E.
|
||||
// If E < B, the mask represents two sections: [0..E] and [B..W) where
|
||||
// W is the width of the register.
|
||||
struct BitTracker::BitMask {
|
||||
BitMask() : B(0), E(0) {}
|
||||
BitMask() = default;
|
||||
BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
|
||||
|
||||
uint16_t first() const { return B; }
|
||||
uint16_t last() const { return E; }
|
||||
private:
|
||||
uint16_t B, E;
|
||||
};
|
||||
|
||||
private:
|
||||
uint16_t B = 0;
|
||||
uint16_t E = 0;
|
||||
};
|
||||
|
||||
// Representation of a register: a list of BitValues.
|
||||
struct BitTracker::RegisterCell {
|
||||
@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
|
||||
uint16_t width() const {
|
||||
return Bits.size();
|
||||
}
|
||||
|
||||
const BitValue &operator[](uint16_t BitN) const {
|
||||
assert(BitN < Bits.size());
|
||||
return Bits[BitN];
|
||||
@ -297,12 +302,10 @@ private:
|
||||
friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
|
||||
};
|
||||
|
||||
|
||||
inline bool BitTracker::has(unsigned Reg) const {
|
||||
return Map.find(Reg) != Map.end();
|
||||
}
|
||||
|
||||
|
||||
inline const BitTracker::RegisterCell&
|
||||
BitTracker::lookup(unsigned Reg) const {
|
||||
CellMapType::const_iterator F = Map.find(Reg);
|
||||
@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
|
||||
return F->second;
|
||||
}
|
||||
|
||||
|
||||
inline BitTracker::RegisterCell
|
||||
BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
|
||||
RegisterCell RC(Width);
|
||||
@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
|
||||
return RC;
|
||||
}
|
||||
|
||||
|
||||
inline BitTracker::RegisterCell
|
||||
BitTracker::RegisterCell::top(uint16_t Width) {
|
||||
RegisterCell RC(Width);
|
||||
@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
|
||||
return RC;
|
||||
}
|
||||
|
||||
|
||||
inline BitTracker::RegisterCell
|
||||
BitTracker::RegisterCell::ref(const RegisterCell &C) {
|
||||
uint16_t W = C.width();
|
||||
@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
|
||||
struct BitTracker::MachineEvaluator {
|
||||
MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
|
||||
: TRI(T), MRI(M) {}
|
||||
virtual ~MachineEvaluator() {}
|
||||
virtual ~MachineEvaluator() = default;
|
||||
|
||||
uint16_t getRegBitWidth(const RegisterRef &RR) const;
|
||||
|
||||
RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
|
||||
void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
|
||||
|
||||
// A result of any operation should use refs to the source cells, not
|
||||
// the cells directly. This function is a convenience wrapper to quickly
|
||||
// generate a ref for a cell corresponding to a register reference.
|
||||
@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
|
||||
|
@ -7,16 +7,30 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include "Hexagon.h"
|
||||
#include "HexagonBitTracker.h"
|
||||
#include "HexagonInstrInfo.h"
|
||||
#include "HexagonRegisterInfo.h"
|
||||
#include "HexagonTargetMachine.h"
|
||||
#include "HexagonBitTracker.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Argument.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
|
||||
using namespace Hexagon;
|
||||
|
||||
if (Sub == 0)
|
||||
return MachineEvaluator::mask(Reg, 0);
|
||||
using namespace Hexagon;
|
||||
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
|
||||
unsigned ID = RC->getID();
|
||||
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
|
||||
@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class RegisterRefs {
|
||||
std::vector<BT::RegisterRef> Vector;
|
||||
|
||||
@ -117,17 +132,21 @@ public:
|
||||
}
|
||||
|
||||
size_t size() const { return Vector.size(); }
|
||||
|
||||
const BT::RegisterRef &operator[](unsigned n) const {
|
||||
// The main purpose of this operator is to assert with bad argument.
|
||||
assert(n < Vector.size());
|
||||
return Vector[n];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
bool HexagonEvaluator::evaluate(const MachineInstr &MI,
|
||||
const CellMapType &Inputs,
|
||||
CellMapType &Outputs) const {
|
||||
using namespace Hexagon;
|
||||
|
||||
unsigned NumDefs = 0;
|
||||
|
||||
// Sanity verification: there should not be any defs with subregisters.
|
||||
@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
|
||||
if (NumDefs == 0)
|
||||
return false;
|
||||
|
||||
using namespace Hexagon;
|
||||
unsigned Opc = MI.getOpcode();
|
||||
|
||||
if (MI.mayLoad()) {
|
||||
@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
|
||||
case S2_cl0:
|
||||
case S2_cl0p:
|
||||
// Always produce a 32-bit result.
|
||||
return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
|
||||
return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
|
||||
case S2_cl1:
|
||||
case S2_cl1p:
|
||||
return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
|
||||
return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
|
||||
case S2_clb:
|
||||
case S2_clbp: {
|
||||
uint16_t W1 = getRegBitWidth(Reg[1]);
|
||||
@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
|
||||
}
|
||||
case S2_ct0:
|
||||
case S2_ct0p:
|
||||
return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
|
||||
return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
|
||||
case S2_ct1:
|
||||
case S2_ct1p:
|
||||
return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
|
||||
return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
|
||||
case S5_popcountp:
|
||||
// TODO
|
||||
break;
|
||||
@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
|
||||
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
|
||||
const CellMapType &Inputs,
|
||||
CellMapType &Outputs) const {
|
||||
using namespace Hexagon;
|
||||
|
||||
if (TII.isPredicated(MI))
|
||||
return false;
|
||||
assert(MI.mayLoad() && "A load that mayn't?");
|
||||
@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
|
||||
|
||||
uint16_t BitNum;
|
||||
bool SignEx;
|
||||
using namespace Hexagon;
|
||||
|
||||
switch (Opc) {
|
||||
default:
|
||||
@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
|
||||
using namespace Hexagon;
|
||||
|
||||
bool Is64 = DoubleRegsRegClass.contains(PReg);
|
||||
assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
|
||||
|
||||
@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
|
||||
return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
|
||||
}
|
||||
|
||||
|
||||
unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
|
||||
typedef MachineRegisterInfo::livein_iterator iterator;
|
||||
for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===--- HexagonBitTracker.h ----------------------------------------------===//
|
||||
//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -7,15 +7,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef HEXAGONBITTRACKER_H
|
||||
#define HEXAGONBITTRACKER_H
|
||||
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
|
||||
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
|
||||
|
||||
#include "BitTracker.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace llvm {
|
||||
class HexagonInstrInfo;
|
||||
class HexagonRegisterInfo;
|
||||
|
||||
class HexagonInstrInfo;
|
||||
class HexagonRegisterInfo;
|
||||
|
||||
struct HexagonEvaluator : public BitTracker::MachineEvaluator {
|
||||
typedef BitTracker::CellMapType CellMapType;
|
||||
@ -49,10 +51,12 @@ private:
|
||||
// Type of formal parameter extension.
|
||||
struct ExtType {
|
||||
enum { SExt, ZExt };
|
||||
char Type;
|
||||
uint16_t Width;
|
||||
ExtType() : Type(0), Width(0) {}
|
||||
|
||||
ExtType() = default;
|
||||
ExtType(char t, uint16_t w) : Type(t), Width(w) {}
|
||||
|
||||
char Type = 0;
|
||||
uint16_t Width = 0;
|
||||
};
|
||||
// Map VR -> extension type.
|
||||
typedef DenseMap<unsigned, ExtType> RegExtMap;
|
||||
@ -61,4 +65,4 @@ private:
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,9 +16,14 @@
|
||||
|
||||
#include "HexagonRegisterInfo.h"
|
||||
#include "MCTargetDesc/HexagonBaseInfo.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/CodeGen/MachineValueType.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "HexagonGenInstrInfo.inc"
|
||||
@ -29,9 +34,10 @@ struct EVT;
|
||||
class HexagonSubtarget;
|
||||
|
||||
class HexagonInstrInfo : public HexagonGenInstrInfo {
|
||||
virtual void anchor();
|
||||
const HexagonRegisterInfo RI;
|
||||
|
||||
virtual void anchor();
|
||||
|
||||
public:
|
||||
explicit HexagonInstrInfo(HexagonSubtarget &ST);
|
||||
|
||||
@ -260,7 +266,7 @@ public:
|
||||
/// PredCost.
|
||||
unsigned getInstrLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr &MI,
|
||||
unsigned *PredCost = 0) const override;
|
||||
unsigned *PredCost = nullptr) const override;
|
||||
|
||||
/// Create machine specific model for scheduling.
|
||||
DFAPacketizer *
|
||||
@ -378,7 +384,6 @@ public:
|
||||
bool PredOpcodeHasJMP_c(unsigned Opcode) const;
|
||||
bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
|
||||
|
||||
|
||||
short getAbsoluteForm(const MachineInstr &MI) const;
|
||||
unsigned getAddrMode(const MachineInstr &MI) const;
|
||||
unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
|
||||
@ -421,13 +426,11 @@ public:
|
||||
unsigned getUnits(const MachineInstr &MI) const;
|
||||
unsigned getValidSubTargets(const unsigned Opcode) const;
|
||||
|
||||
|
||||
/// getInstrTimingClassLatency - Compute the instruction latency of a given
|
||||
/// instruction using Timing Class information, if available.
|
||||
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
|
||||
unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
|
||||
|
||||
|
||||
void immediateExtend(MachineInstr &MI) const;
|
||||
bool invertAndChangeJumpTarget(MachineInstr &MI,
|
||||
MachineBasicBlock* NewTarget) const;
|
||||
@ -438,6 +441,6 @@ public:
|
||||
short xformRegToImmOffset(const MachineInstr &MI) const;
|
||||
};
|
||||
|
||||
}
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
|
||||
|
@ -15,33 +15,31 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace Hexagon {
|
||||
namespace Hexagon {
|
||||
|
||||
const unsigned int StartPacket = 0x1;
|
||||
const unsigned int EndPacket = 0x2;
|
||||
}
|
||||
|
||||
} // end namespace Hexagon
|
||||
|
||||
/// Hexagon target-specific information for each MachineFunction.
|
||||
class HexagonMachineFunctionInfo : public MachineFunctionInfo {
|
||||
// SRetReturnReg - Some subtargets require that sret lowering includes
|
||||
// returning the value of the returned struct in a register. This field
|
||||
// holds the virtual register into which the sret argument is passed.
|
||||
unsigned SRetReturnReg;
|
||||
unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual)
|
||||
unsigned StackAlignBasePhysReg; // (physical)
|
||||
unsigned SRetReturnReg = 0;
|
||||
unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
|
||||
unsigned StackAlignBasePhysReg = 0; // (physical)
|
||||
int VarArgsFrameIndex;
|
||||
bool HasClobberLR;
|
||||
bool HasEHReturn;
|
||||
bool HasClobberLR = false;
|
||||
bool HasEHReturn = false;
|
||||
std::map<const MachineInstr*, unsigned> PacketInfo;
|
||||
virtual void anchor();
|
||||
|
||||
public:
|
||||
HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
|
||||
StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
|
||||
HexagonMachineFunctionInfo() = default;
|
||||
|
||||
HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
|
||||
StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
|
||||
HasEHReturn(false) {}
|
||||
HexagonMachineFunctionInfo(MachineFunction &MF) {}
|
||||
|
||||
unsigned getSRetReturnReg() const { return SRetReturnReg; }
|
||||
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
|
||||
@ -75,6 +73,7 @@ public:
|
||||
void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
|
||||
unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
|
||||
|
@ -10,17 +10,27 @@
|
||||
// This file contains the declarations of the HexagonTargetAsmInfo properties.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "hexagon-sdata"
|
||||
|
||||
#include "HexagonTargetMachine.h"
|
||||
#include "HexagonTargetObjectFile.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GlobalObject.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/SectionKind.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ELF.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
|
||||
// (e.g. -debug and -debug-only=globallayout)
|
||||
#define TRACE_TO(s, X) s << X
|
||||
#ifdef NDEBUG
|
||||
#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
|
||||
#define TRACE(X) \
|
||||
do { \
|
||||
if (TraceGVPlacement) { \
|
||||
TRACE_TO(errs(), X); \
|
||||
} \
|
||||
} while (false)
|
||||
#else
|
||||
#define TRACE(X) \
|
||||
do { \
|
||||
if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
|
||||
else { DEBUG( TRACE_TO(dbgs(), X) ); } \
|
||||
} while (0)
|
||||
#define TRACE(X) \
|
||||
do { \
|
||||
if (TraceGVPlacement) { \
|
||||
TRACE_TO(errs(), X); \
|
||||
} else { \
|
||||
DEBUG(TRACE_TO(dbgs(), X)); \
|
||||
} \
|
||||
} while (false)
|
||||
#endif
|
||||
|
||||
// Returns true if the section name is such that the symbol will be put
|
||||
@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
|
||||
Sec.find(".scommon.") != StringRef::npos;
|
||||
}
|
||||
|
||||
|
||||
static const char *getSectionSuffixForSize(unsigned Size) {
|
||||
switch (Size) {
|
||||
default:
|
||||
@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
|
||||
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
|
||||
}
|
||||
|
||||
|
||||
/// Return true if this global value should be placed into small data/bss
|
||||
/// section.
|
||||
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
|
||||
@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool HexagonTargetObjectFile::isSmallDataEnabled() const {
|
||||
return SmallDataThreshold > 0;
|
||||
}
|
||||
|
||||
|
||||
unsigned HexagonTargetObjectFile::getSmallDataSize() const {
|
||||
return SmallDataThreshold;
|
||||
}
|
||||
|
||||
|
||||
/// Descends any type down to "elementary" components,
|
||||
/// discovering the smallest addressable one.
|
||||
/// If zero is returned, declaration will not be modified.
|
||||
|
@ -1,5 +1,4 @@
|
||||
|
||||
//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===//
|
||||
//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -11,18 +10,17 @@
|
||||
// This file is looks at a packet and tries to form compound insns
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Hexagon.h"
|
||||
#include "MCTargetDesc/HexagonBaseInfo.h"
|
||||
#include "MCTargetDesc/HexagonMCShuffler.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "MCTargetDesc/HexagonMCInstrInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCSectionELF.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace Hexagon;
|
||||
@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
|
||||
};
|
||||
|
||||
// enum HexagonII::CompoundGroup
|
||||
namespace {
|
||||
unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
|
||||
static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
|
||||
unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
|
||||
|
||||
return HexagonII::HCG_None;
|
||||
}
|
||||
}
|
||||
|
||||
/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
|
||||
namespace {
|
||||
unsigned getCompoundOp(MCInst const &HMCI) {
|
||||
static unsigned getCompoundOp(MCInst const &HMCI) {
|
||||
const MCOperand &Predicate = HMCI.getOperand(0);
|
||||
unsigned PredReg = Predicate.getReg();
|
||||
|
||||
@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
|
||||
return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
|
||||
MCInst *CompoundInsn = 0;
|
||||
static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
|
||||
MCInst const &R) {
|
||||
MCInst *CompoundInsn = nullptr;
|
||||
unsigned compoundOpcode;
|
||||
MCOperand Rs, Rt;
|
||||
int64_t Value;
|
||||
@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
|
||||
|
||||
return CompoundInsn;
|
||||
}
|
||||
}
|
||||
|
||||
/// Non-Symmetrical. See if these two instructions are fit for compound pair.
|
||||
namespace {
|
||||
bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
|
||||
MCInst const &MIb, bool IsExtendedB) {
|
||||
static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
|
||||
MCInst const &MIb, bool IsExtendedB) {
|
||||
unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
|
||||
unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
|
||||
// We have two candidates - check that this is the same register
|
||||
@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
|
||||
return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
|
||||
(MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
|
||||
static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
|
||||
MCInst &MCI) {
|
||||
assert(HexagonMCInstrInfo::isBundle(MCI));
|
||||
bool JExtended = false;
|
||||
for (MCInst::iterator J =
|
||||
@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
|
||||
JExtended = true;
|
||||
continue;
|
||||
}
|
||||
if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
|
||||
HexagonII::TypeJ) {
|
||||
if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
|
||||
// Try to pair with another insn (B)undled with jump.
|
||||
bool BExtended = false;
|
||||
for (MCInst::iterator B =
|
||||
@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// tryCompound - Given a bundle check for compound insns when one
|
||||
/// is found update the contents fo the bundle with the compound insn.
|
||||
@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
|
||||
// a compound is found.
|
||||
while (lookForCompound(MCII, Context, MCI))
|
||||
;
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===--- RDFCopy.h --------------------------------------------------------===//
|
||||
//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -7,23 +7,26 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef RDF_COPY_H
|
||||
#define RDF_COPY_H
|
||||
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
|
||||
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
|
||||
|
||||
#include "RDFGraph.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineBasicBlock;
|
||||
class MachineDominatorTree;
|
||||
class MachineInstr;
|
||||
|
||||
namespace rdf {
|
||||
|
||||
struct CopyPropagation {
|
||||
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
|
||||
Trace(false) {}
|
||||
virtual ~CopyPropagation() {}
|
||||
|
||||
virtual ~CopyPropagation() = default;
|
||||
|
||||
bool run();
|
||||
void trace(bool On) { Trace = On; }
|
||||
@ -49,7 +52,9 @@ namespace rdf {
|
||||
void updateMap(NodeAddr<InstrNode*> IA);
|
||||
bool scanBlock(MachineBasicBlock *B);
|
||||
};
|
||||
} // namespace rdf
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
} // end namespace rdf
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
|
||||
|
@ -10,16 +10,31 @@
|
||||
// Target-independent, SSA-based data flow graph for register data flow (RDF).
|
||||
//
|
||||
#include "RDFGraph.h"
|
||||
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineDominanceFrontier.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/MC/LaneBitmask.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace llvm;
|
||||
using namespace rdf;
|
||||
@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
|
||||
return OS;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
|
||||
const DataFlowGraph &G) {
|
||||
OS << Print<NodeId>(RA.Id, G) << '<'
|
||||
<< Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
|
||||
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
|
||||
OS << '!';
|
||||
}
|
||||
static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
|
||||
const DataFlowGraph &G) {
|
||||
OS << Print<NodeId>(RA.Id, G) << '<'
|
||||
<< Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
|
||||
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
|
||||
OS << '!';
|
||||
}
|
||||
|
||||
template<>
|
||||
@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T>
|
||||
struct PrintListV {
|
||||
PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
|
||||
|
||||
typedef T Type;
|
||||
const NodeList &List;
|
||||
const DataFlowGraph &G;
|
||||
@ -201,7 +216,8 @@ namespace {
|
||||
}
|
||||
return OS;
|
||||
}
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
template<>
|
||||
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
|
||||
@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
|
||||
// Print the target for calls and branches (for readability).
|
||||
if (MI.isCall() || MI.isBranch()) {
|
||||
MachineInstr::const_mop_iterator T =
|
||||
find_if(MI.operands(),
|
||||
[] (const MachineOperand &Op) -> bool {
|
||||
return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
|
||||
});
|
||||
llvm::find_if(MI.operands(),
|
||||
[] (const MachineOperand &Op) -> bool {
|
||||
return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
|
||||
});
|
||||
if (T != MI.operands_end()) {
|
||||
OS << ' ';
|
||||
if (T->isMBB())
|
||||
@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
|
||||
return OS;
|
||||
}
|
||||
|
||||
} // namespace rdf
|
||||
} // namespace llvm
|
||||
} // end namespace rdf
|
||||
} // end namespace llvm
|
||||
|
||||
// Node allocation functions.
|
||||
//
|
||||
@ -390,7 +406,6 @@ void NodeAllocator::clear() {
|
||||
ActiveEnd = nullptr;
|
||||
}
|
||||
|
||||
|
||||
// Insert node NA after "this" in the circular chain.
|
||||
void NodeBase::append(NodeAddr<NodeBase*> NA) {
|
||||
NodeId Nx = Next;
|
||||
@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Fundamental node manipulator functions.
|
||||
|
||||
// Obtain the register reference from a reference node.
|
||||
@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
|
||||
return findBlock(EntryB, G);
|
||||
}
|
||||
|
||||
|
||||
// Target operand information.
|
||||
//
|
||||
|
||||
@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
|
||||
RegisterId SuperReg = RR.Reg;
|
||||
while (true) {
|
||||
@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
|
||||
OS << " }";
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// The data flow graph construction.
|
||||
//
|
||||
@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
|
||||
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
|
||||
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
|
||||
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
|
||||
: LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
|
||||
: MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
|
||||
}
|
||||
|
||||
|
||||
// The implementation of the definition stack.
|
||||
// Each register reference has its own definition stack. In particular,
|
||||
// for a register references "Reg" and "Reg:subreg" will each have their
|
||||
@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
|
||||
return P;
|
||||
}
|
||||
|
||||
|
||||
// Register information.
|
||||
|
||||
// Get the list of references aliased to RR. Lane masks are ignored.
|
||||
@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
|
||||
return NA;
|
||||
}
|
||||
|
||||
|
||||
// Allocation routines for specific node types/kinds.
|
||||
|
||||
NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
|
||||
@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Clear all information in the graph.
|
||||
void DataFlowGraph::reset() {
|
||||
Memory.clear();
|
||||
@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
|
||||
Func = NodeAddr<FuncNode*>();
|
||||
}
|
||||
|
||||
|
||||
// Return the next reference node in the instruction node IA that is related
|
||||
// to RA. Conceptually, two reference nodes are related if they refer to the
|
||||
// same instance of a register access, but differ in flags or other minor
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===--- RDFGraph.h -------------------------------------------------------===//
|
||||
//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -221,20 +221,25 @@
|
||||
// The statement s5 has two use nodes for t0: u7" and u9". The quotation
|
||||
// mark " indicates that the node is a shadow.
|
||||
//
|
||||
#ifndef RDF_GRAPH_H
|
||||
#define RDF_GRAPH_H
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
|
||||
#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/MC/LaneBitmask.h"
|
||||
#include "llvm/Support/Allocator.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// RDF uses uint32_t to refer to registers. This is to ensure that the type
|
||||
@ -243,6 +248,7 @@
|
||||
static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineBasicBlock;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
@ -252,6 +258,7 @@ namespace llvm {
|
||||
class TargetInstrInfo;
|
||||
|
||||
namespace rdf {
|
||||
|
||||
typedef uint32_t NodeId;
|
||||
typedef uint32_t RegisterId;
|
||||
|
||||
@ -293,9 +300,11 @@ namespace rdf {
|
||||
static uint16_t set_type(uint16_t A, uint16_t T) {
|
||||
return (A & ~TypeMask) | T;
|
||||
}
|
||||
|
||||
static uint16_t set_kind(uint16_t A, uint16_t K) {
|
||||
return (A & ~KindMask) | K;
|
||||
}
|
||||
|
||||
static uint16_t set_flags(uint16_t A, uint16_t F) {
|
||||
return (A & ~FlagMask) | F;
|
||||
}
|
||||
@ -326,9 +335,14 @@ namespace rdf {
|
||||
};
|
||||
|
||||
template <typename T> struct NodeAddr {
|
||||
NodeAddr() : Addr(nullptr), Id(0) {}
|
||||
NodeAddr() : Addr(nullptr) {}
|
||||
NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
|
||||
|
||||
// Type cast (casting constructor). The reason for having this class
|
||||
// instead of std::pair.
|
||||
template <typename S> NodeAddr(const NodeAddr<S> &NA)
|
||||
: Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
|
||||
|
||||
bool operator== (const NodeAddr<T> &NA) const {
|
||||
assert((Addr == NA.Addr) == (Id == NA.Id));
|
||||
return Addr == NA.Addr;
|
||||
@ -336,13 +350,9 @@ namespace rdf {
|
||||
bool operator!= (const NodeAddr<T> &NA) const {
|
||||
return !operator==(NA);
|
||||
}
|
||||
// Type cast (casting constructor). The reason for having this class
|
||||
// instead of std::pair.
|
||||
template <typename S> NodeAddr(const NodeAddr<S> &NA)
|
||||
: Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
|
||||
|
||||
T Addr;
|
||||
NodeId Id;
|
||||
NodeId Id = 0;
|
||||
};
|
||||
|
||||
struct NodeBase;
|
||||
@ -366,17 +376,20 @@ namespace rdf {
|
||||
struct NodeAllocator {
|
||||
// Amount of storage for a single node.
|
||||
enum { NodeMemSize = 32 };
|
||||
|
||||
NodeAllocator(uint32_t NPB = 4096)
|
||||
: NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
|
||||
IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
|
||||
IndexMask((1 << BitsPerIndex)-1) {
|
||||
assert(isPowerOf2_32(NPB));
|
||||
}
|
||||
|
||||
NodeBase *ptr(NodeId N) const {
|
||||
uint32_t N1 = N-1;
|
||||
uint32_t BlockN = N1 >> BitsPerIndex;
|
||||
uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
|
||||
return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
|
||||
}
|
||||
|
||||
NodeId id(const NodeBase *P) const;
|
||||
NodeAddr<NodeBase*> New();
|
||||
void clear();
|
||||
@ -384,6 +397,7 @@ namespace rdf {
|
||||
private:
|
||||
void startNewBlock();
|
||||
bool needNewBlock();
|
||||
|
||||
uint32_t makeId(uint32_t Block, uint32_t Index) const {
|
||||
// Add 1 to the id, to avoid the id of 0, which is treated as "null".
|
||||
return ((Block << BitsPerIndex) | Index) + 1;
|
||||
@ -392,7 +406,7 @@ namespace rdf {
|
||||
const uint32_t NodesPerBlock;
|
||||
const uint32_t BitsPerIndex;
|
||||
const uint32_t IndexMask;
|
||||
char *ActiveEnd;
|
||||
char *ActiveEnd = nullptr;
|
||||
std::vector<char*> Blocks;
|
||||
typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
|
||||
AllocatorTy MemPool;
|
||||
@ -405,6 +419,7 @@ namespace rdf {
|
||||
RegisterRef() : RegisterRef(0) {}
|
||||
explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
|
||||
: Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
|
||||
|
||||
operator bool() const { return Reg != 0 && Mask.any(); }
|
||||
bool operator== (const RegisterRef &RR) const {
|
||||
return Reg == RR.Reg && Mask == RR.Mask;
|
||||
@ -420,7 +435,8 @@ namespace rdf {
|
||||
|
||||
struct TargetOperandInfo {
|
||||
TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
|
||||
virtual ~TargetOperandInfo() {}
|
||||
virtual ~TargetOperandInfo() = default;
|
||||
|
||||
virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
|
||||
virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
|
||||
virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
|
||||
@ -428,7 +444,6 @@ namespace rdf {
|
||||
const TargetInstrInfo &TII;
|
||||
};
|
||||
|
||||
|
||||
// Packed register reference. Only used for storage.
|
||||
struct PackedRegisterRef {
|
||||
RegisterId Reg;
|
||||
@ -442,11 +457,13 @@ namespace rdf {
|
||||
template <typename T, unsigned N = 32>
|
||||
struct IndexedSet {
|
||||
IndexedSet() : Map() { Map.reserve(N); }
|
||||
|
||||
T get(uint32_t Idx) const {
|
||||
// Index Idx corresponds to Map[Idx-1].
|
||||
assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
|
||||
return Map[Idx-1];
|
||||
}
|
||||
|
||||
uint32_t insert(T Val) {
|
||||
// Linear search.
|
||||
auto F = llvm::find(Map, Val);
|
||||
@ -455,11 +472,13 @@ namespace rdf {
|
||||
Map.push_back(Val);
|
||||
return Map.size(); // Return actual_index + 1.
|
||||
}
|
||||
|
||||
uint32_t find(T Val) const {
|
||||
auto F = llvm::find(Map, Val);
|
||||
assert(F != Map.end());
|
||||
return F - Map.begin();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<T> Map;
|
||||
};
|
||||
@ -478,12 +497,14 @@ namespace rdf {
|
||||
assert(LM.any());
|
||||
return LM.all() ? 0 : find(LM);
|
||||
}
|
||||
|
||||
PackedRegisterRef pack(RegisterRef RR) {
|
||||
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
|
||||
}
|
||||
PackedRegisterRef pack(RegisterRef RR) const {
|
||||
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
|
||||
}
|
||||
|
||||
RegisterRef unpack(PackedRegisterRef PR) const {
|
||||
return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
|
||||
}
|
||||
@ -491,11 +512,8 @@ namespace rdf {
|
||||
|
||||
struct RegisterAggr {
|
||||
RegisterAggr(const TargetRegisterInfo &tri)
|
||||
: Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
|
||||
TRI(tri) {}
|
||||
RegisterAggr(const RegisterAggr &RG)
|
||||
: Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
|
||||
CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
|
||||
: ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
|
||||
RegisterAggr(const RegisterAggr &RG) = default;
|
||||
|
||||
bool empty() const { return Masks.empty(); }
|
||||
bool hasAliasOf(RegisterRef RR) const;
|
||||
@ -530,11 +548,11 @@ namespace rdf {
|
||||
const TargetRegisterInfo &TRI;
|
||||
};
|
||||
|
||||
|
||||
struct NodeBase {
|
||||
public:
|
||||
// Make sure this is a POD.
|
||||
NodeBase() = default;
|
||||
|
||||
uint16_t getType() const { return NodeAttrs::type(Attrs); }
|
||||
uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
|
||||
uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
|
||||
@ -596,29 +614,36 @@ namespace rdf {
|
||||
|
||||
struct RefNode : public NodeBase {
|
||||
RefNode() = default;
|
||||
|
||||
RegisterRef getRegRef(const DataFlowGraph &G) const;
|
||||
|
||||
MachineOperand &getOp() {
|
||||
assert(!(getFlags() & NodeAttrs::PhiRef));
|
||||
return *Ref.Op;
|
||||
}
|
||||
|
||||
void setRegRef(RegisterRef RR, DataFlowGraph &G);
|
||||
void setRegRef(MachineOperand *Op, DataFlowGraph &G);
|
||||
|
||||
NodeId getReachingDef() const {
|
||||
return Ref.RD;
|
||||
}
|
||||
void setReachingDef(NodeId RD) {
|
||||
Ref.RD = RD;
|
||||
}
|
||||
|
||||
NodeId getSibling() const {
|
||||
return Ref.Sib;
|
||||
}
|
||||
void setSibling(NodeId Sib) {
|
||||
Ref.Sib = Sib;
|
||||
}
|
||||
|
||||
bool isUse() const {
|
||||
assert(getType() == NodeAttrs::Ref);
|
||||
return getKind() == NodeAttrs::Use;
|
||||
}
|
||||
|
||||
bool isDef() const {
|
||||
assert(getType() == NodeAttrs::Ref);
|
||||
return getKind() == NodeAttrs::Def;
|
||||
@ -702,6 +727,7 @@ namespace rdf {
|
||||
MachineBasicBlock *getCode() const {
|
||||
return CodeNode::getCode<MachineBasicBlock*>();
|
||||
}
|
||||
|
||||
void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
|
||||
};
|
||||
|
||||
@ -709,6 +735,7 @@ namespace rdf {
|
||||
MachineFunction *getCode() const {
|
||||
return CodeNode::getCode<MachineFunction*>();
|
||||
}
|
||||
|
||||
NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
|
||||
const DataFlowGraph &G) const;
|
||||
NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
|
||||
@ -723,6 +750,7 @@ namespace rdf {
|
||||
template <typename T> T ptr(NodeId N) const {
|
||||
return static_cast<T>(ptr(N));
|
||||
}
|
||||
|
||||
NodeId id(const NodeBase *P) const;
|
||||
|
||||
template <typename T> NodeAddr<T> addr(NodeId N) const {
|
||||
@ -738,13 +766,17 @@ namespace rdf {
|
||||
|
||||
struct DefStack {
|
||||
DefStack() = default;
|
||||
|
||||
bool empty() const { return Stack.empty() || top() == bottom(); }
|
||||
|
||||
private:
|
||||
typedef NodeAddr<DefNode*> value_type;
|
||||
struct Iterator {
|
||||
typedef DefStack::value_type value_type;
|
||||
|
||||
Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
|
||||
Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
|
||||
|
||||
value_type operator*() const {
|
||||
assert(Pos >= 1);
|
||||
return DS.Stack[Pos-1];
|
||||
@ -755,14 +787,17 @@ namespace rdf {
|
||||
}
|
||||
bool operator==(const Iterator &It) const { return Pos == It.Pos; }
|
||||
bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
|
||||
|
||||
private:
|
||||
Iterator(const DefStack &S, bool Top);
|
||||
|
||||
// Pos-1 is the index in the StorageType object that corresponds to
|
||||
// the top of the DefStack.
|
||||
const DefStack &DS;
|
||||
unsigned Pos;
|
||||
friend struct DefStack;
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Iterator iterator;
|
||||
iterator top() const { return Iterator(*this, true); }
|
||||
@ -773,14 +808,18 @@ namespace rdf {
|
||||
void pop();
|
||||
void start_block(NodeId N);
|
||||
void clear_block(NodeId N);
|
||||
|
||||
private:
|
||||
friend struct Iterator;
|
||||
typedef std::vector<value_type> StorageType;
|
||||
|
||||
bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
|
||||
return (P.Addr == nullptr) && (N == 0 || P.Id == N);
|
||||
}
|
||||
|
||||
unsigned nextUp(unsigned P) const;
|
||||
unsigned nextDown(unsigned P) const;
|
||||
|
||||
StorageType Stack;
|
||||
};
|
||||
|
||||
@ -819,6 +858,7 @@ namespace rdf {
|
||||
if (RemoveFromOwner)
|
||||
removeFromOwner(UA);
|
||||
}
|
||||
|
||||
void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
|
||||
unlinkDefDF(DA);
|
||||
if (RemoveFromOwner)
|
||||
@ -831,23 +871,28 @@ namespace rdf {
|
||||
return BA.Addr->getType() == NodeAttrs::Ref &&
|
||||
BA.Addr->getKind() == Kind;
|
||||
}
|
||||
|
||||
template <uint16_t Kind>
|
||||
static bool IsCode(const NodeAddr<NodeBase*> BA) {
|
||||
return BA.Addr->getType() == NodeAttrs::Code &&
|
||||
BA.Addr->getKind() == Kind;
|
||||
}
|
||||
|
||||
static bool IsDef(const NodeAddr<NodeBase*> BA) {
|
||||
return BA.Addr->getType() == NodeAttrs::Ref &&
|
||||
BA.Addr->getKind() == NodeAttrs::Def;
|
||||
}
|
||||
|
||||
static bool IsUse(const NodeAddr<NodeBase*> BA) {
|
||||
return BA.Addr->getType() == NodeAttrs::Ref &&
|
||||
BA.Addr->getKind() == NodeAttrs::Use;
|
||||
}
|
||||
|
||||
static bool IsPhi(const NodeAddr<NodeBase*> BA) {
|
||||
return BA.Addr->getType() == NodeAttrs::Code &&
|
||||
BA.Addr->getKind() == NodeAttrs::Phi;
|
||||
}
|
||||
|
||||
static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
|
||||
uint16_t Flags = DA.Addr->getFlags();
|
||||
return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
|
||||
@ -902,6 +947,7 @@ namespace rdf {
|
||||
|
||||
void unlinkUseDF(NodeAddr<UseNode*> UA);
|
||||
void unlinkDefDF(NodeAddr<DefNode*> DA);
|
||||
|
||||
void removeFromOwner(NodeAddr<RefNode*> RA) {
|
||||
NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
|
||||
IA.Addr->removeMember(RA, *this);
|
||||
@ -967,7 +1013,6 @@ namespace rdf {
|
||||
return MM;
|
||||
}
|
||||
|
||||
|
||||
// Optionally print the lane mask, if it is not ~0.
|
||||
struct PrintLaneMaskOpt {
|
||||
PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
|
||||
@ -991,7 +1036,9 @@ namespace rdf {
|
||||
PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
|
||||
: Print<NodeAddr<T>>(x, g) {}
|
||||
};
|
||||
} // namespace rdf
|
||||
} // namespace llvm
|
||||
|
||||
#endif // RDF_GRAPH_H
|
||||
} // end namespace rdf
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
||||
return MipsDAGToDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
SelectionDAGISel::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
|
||||
MachineFunction &MF) {
|
||||
MachineInstrBuilder MIB(MF, &MI);
|
||||
|
@ -28,6 +28,8 @@ private:
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
|
||||
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
|
||||
MachineFunction &MF);
|
||||
|
||||
|
@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
|
||||
static bool isFunctionGlobalAddress(SDValue Callee);
|
||||
|
||||
static bool
|
||||
resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
|
||||
resideInSameSection(const Function *Caller, SDValue Callee,
|
||||
const TargetMachine &TM) {
|
||||
// If !G, Callee can be an external symbol.
|
||||
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
|
||||
if (!G) return false;
|
||||
|
||||
const GlobalValue *GV = G->getGlobal();
|
||||
|
||||
if (GV->isDeclaration()) return false;
|
||||
|
||||
switch(GV->getLinkage()) {
|
||||
default: llvm_unreachable("unknow linkage type");
|
||||
case GlobalValue::AvailableExternallyLinkage:
|
||||
case GlobalValue::ExternalWeakLinkage:
|
||||
if (!G)
|
||||
return false;
|
||||
|
||||
// Callee with weak linkage is allowed if it has hidden or protected
|
||||
// visibility
|
||||
case GlobalValue::LinkOnceAnyLinkage:
|
||||
case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
|
||||
case GlobalValue::WeakAnyLinkage:
|
||||
case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
|
||||
if (GV->hasDefaultVisibility())
|
||||
return false;
|
||||
const GlobalValue *GV = G->getGlobal();
|
||||
if (!GV->isStrongDefinitionForLinker())
|
||||
return false;
|
||||
|
||||
case GlobalValue::ExternalLinkage:
|
||||
case GlobalValue::InternalLinkage:
|
||||
case GlobalValue::PrivateLinkage:
|
||||
break;
|
||||
// Any explicitly-specified sections and section prefixes must also match.
|
||||
// Also, if we're using -ffunction-sections, then each function is always in
|
||||
// a different section (the same is true for COMDAT functions).
|
||||
if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
|
||||
GV->getSection() != Caller->getSection())
|
||||
return false;
|
||||
if (const auto *F = dyn_cast<Function>(GV)) {
|
||||
if (F->getSectionPrefix() != Caller->getSectionPrefix())
|
||||
return false;
|
||||
}
|
||||
|
||||
// With '-fPIC', calling default visiblity function need insert 'nop' after
|
||||
// function call, no matter that function resides in same module or not, so
|
||||
// we treat it as in different module.
|
||||
if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
|
||||
// If the callee might be interposed, then we can't assume the ultimate call
|
||||
// target will be in the same section. Even in cases where we can assume that
|
||||
// interposition won't happen, in any case where the linker might insert a
|
||||
// stub to allow for interposition, we must generate code as though
|
||||
// interposition might occur. To understand why this matters, consider a
|
||||
// situation where: a -> b -> c where the arrows indicate calls. b and c are
|
||||
// in the same section, but a is in a different module (i.e. has a different
|
||||
// TOC base pointer). If the linker allows for interposition between b and c,
|
||||
// then it will generate a stub for the call edge between b and c which will
|
||||
// save the TOC pointer into the designated stack slot allocated by b. If we
|
||||
// return true here, and therefore allow a tail call between b and c, that
|
||||
// stack slot won't exist and the b -> c stub will end up saving b'c TOC base
|
||||
// pointer into the stack slot allocated by a (where the a -> b stub saved
|
||||
// a's TOC base pointer). If we're not considering a tail call, but rather,
|
||||
// whether a nop is needed after the call instruction in b, because the linker
|
||||
// will insert a stub, it might complain about a missing nop if we omit it
|
||||
// (although many don't complain in this case).
|
||||
if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
|
||||
!isa<ExternalSymbolSDNode>(Callee))
|
||||
return false;
|
||||
|
||||
// Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
|
||||
// (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
|
||||
// module.
|
||||
// Check if Callee resides in the same section, because for now, PPC64 SVR4
|
||||
// ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
|
||||
// section.
|
||||
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
|
||||
if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
|
||||
if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
|
||||
return false;
|
||||
|
||||
// TCO allows altering callee ABI, so we don't have to check further.
|
||||
@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
|
||||
return CallOpc;
|
||||
}
|
||||
|
||||
static
|
||||
bool isLocalCall(const SDValue &Callee)
|
||||
{
|
||||
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
|
||||
return G->getGlobal()->isStrongDefinitionForLinker();
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerCallResult(
|
||||
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
|
||||
@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
|
||||
// stack frame. If caller and callee belong to the same module (and have the
|
||||
// same TOC), the NOP will remain unchanged.
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
|
||||
!isPatchPoint) {
|
||||
if (CallOpc == PPCISD::BCTRL) {
|
||||
@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
|
||||
// The address needs to go after the chain input but before the flag (or
|
||||
// any other variadic arguments).
|
||||
Ops.insert(std::next(Ops.begin()), AddTOC);
|
||||
} else if ((CallOpc == PPCISD::CALL) &&
|
||||
(!isLocalCall(Callee) ||
|
||||
DAG.getTarget().getRelocationModel() == Reloc::PIC_))
|
||||
} else if (CallOpc == PPCISD::CALL &&
|
||||
!resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
|
||||
// Otherwise insert NOP for non-local calls.
|
||||
CallOpc = PPCISD::CALL_NOP;
|
||||
}
|
||||
}
|
||||
|
||||
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
|
||||
|
@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
EmitFunctionBody();
|
||||
|
||||
// Emit the XRay table for this function.
|
||||
EmitXRayTable();
|
||||
emitXRayTable();
|
||||
|
||||
// We didn't modify anything.
|
||||
return false;
|
||||
|
@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
|
||||
MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
|
||||
: std::next(MBBI);
|
||||
PI = skipDebugInstructionsBackward(PI, MBB.begin());
|
||||
if (NI != nullptr)
|
||||
NI = skipDebugInstructionsForward(NI, MBB.end());
|
||||
|
||||
unsigned Opc = PI->getOpcode();
|
||||
int Offset = 0;
|
||||
|
||||
@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
|
||||
uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
|
||||
I = MBB.erase(I);
|
||||
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
|
||||
|
||||
if (!reserveCallFrame) {
|
||||
// If the stack pointer can be changed after prologue, turn the
|
||||
@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
|
||||
if (HasDwarfEHHandlers && !isDestroy &&
|
||||
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
|
||||
BuildCFI(MBB, I, DL,
|
||||
BuildCFI(MBB, InsertPos, DL,
|
||||
MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
|
||||
|
||||
if (Amount == 0)
|
||||
@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
// If this is a callee-pop calling convention, emit a CFA adjust for
|
||||
// the amount the callee popped.
|
||||
if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
|
||||
BuildCFI(MBB, I, DL,
|
||||
BuildCFI(MBB, InsertPos, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
|
||||
|
||||
// Add Amount to SP to destroy a frame, or subtract to setup.
|
||||
@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
// Merge with any previous or following adjustment instruction. Note: the
|
||||
// instructions merged with here do not have CFI, so their stack
|
||||
// adjustments do not feed into CfaAdjustment.
|
||||
StackAdjustment += mergeSPUpdates(MBB, I, true);
|
||||
StackAdjustment += mergeSPUpdates(MBB, I, false);
|
||||
StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
|
||||
StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
|
||||
|
||||
if (StackAdjustment) {
|
||||
if (!(Fn->optForMinSize() &&
|
||||
adjustStackWithPops(MBB, I, DL, StackAdjustment)))
|
||||
BuildStackAdjustment(MBB, I, DL, StackAdjustment,
|
||||
adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
|
||||
BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
|
||||
/*InEpilogue=*/false);
|
||||
}
|
||||
}
|
||||
@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
// TODO: When not using precise CFA, we also need to adjust for the
|
||||
// InternalAmt here.
|
||||
if (CfaAdjustment) {
|
||||
BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
|
||||
nullptr, CfaAdjustment));
|
||||
BuildCFI(MBB, InsertPos, DL,
|
||||
MCCFIInstruction::createAdjustCfaOffset(nullptr,
|
||||
CfaAdjustment));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
const SmallBitVector &Zeroable,
|
||||
const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (!canWidenShuffleElements(Mask, WidenedMask))
|
||||
return SDValue();
|
||||
|
||||
// TODO: If minimizing size and one of the inputs is a zero vector and the
|
||||
// the zero vector has only one use, we could use a VPERM2X128 to save the
|
||||
// instruction bytes needed to explicitly generate the zero vector.
|
||||
@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
// [6] - ignore
|
||||
// [7] - zero high half of destination
|
||||
|
||||
int MaskLO = Mask[0];
|
||||
if (MaskLO == SM_SentinelUndef)
|
||||
MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
|
||||
int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
|
||||
int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
|
||||
|
||||
int MaskHI = Mask[2];
|
||||
if (MaskHI == SM_SentinelUndef)
|
||||
MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
|
||||
|
||||
unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
|
||||
unsigned PermMask = MaskLO | (MaskHI << 4);
|
||||
|
||||
// If either input is a zero vector, replace it with an undef input.
|
||||
// Shuffle mask values < 4 are selecting elements of V1.
|
||||
@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
|
||||
// selecting the zero vector and setting the zero mask bit.
|
||||
if (IsV1Zero) {
|
||||
V1 = DAG.getUNDEF(VT);
|
||||
if (MaskLO < 4)
|
||||
if (MaskLO < 2)
|
||||
PermMask = (PermMask & 0xf0) | 0x08;
|
||||
if (MaskHI < 4)
|
||||
if (MaskHI < 2)
|
||||
PermMask = (PermMask & 0x0f) | 0x80;
|
||||
}
|
||||
if (IsV2Zero) {
|
||||
V2 = DAG.getUNDEF(VT);
|
||||
if (MaskLO >= 4)
|
||||
if (MaskLO >= 2)
|
||||
PermMask = (PermMask & 0xf0) | 0x08;
|
||||
if (MaskHI >= 4)
|
||||
if (MaskHI >= 2)
|
||||
PermMask = (PermMask & 0x0f) | 0x80;
|
||||
}
|
||||
|
||||
@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
|
||||
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (V2.isUndef()) {
|
||||
// Check for being able to broadcast a single element.
|
||||
@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
|
||||
assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
|
||||
|
||||
SmallVector<int, 4> WidenedMask;
|
||||
if (canWidenShuffleElements(Mask, WidenedMask))
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
return V;
|
||||
|
||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
|
||||
Zeroable, Subtarget, DAG))
|
||||
@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
|
||||
if (!canWidenShuffleElements(Mask, WidenedMask))
|
||||
return SDValue();
|
||||
|
||||
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
|
||||
// Insure elements came from the same Op.
|
||||
int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
|
||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
||||
if (WidenedMask[i] == SM_SentinelZero)
|
||||
return SDValue();
|
||||
if (WidenedMask[i] == SM_SentinelUndef)
|
||||
// Check for patterns which can be matched with a single insert of a 256-bit
|
||||
// subvector.
|
||||
bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
|
||||
{0, 1, 2, 3, 0, 1, 2, 3});
|
||||
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
|
||||
{0, 1, 2, 3, 8, 9, 10, 11})) {
|
||||
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
|
||||
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
|
||||
OnlyUsesV1 ? V1 : V2,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
|
||||
}
|
||||
|
||||
assert(WidenedMask.size() == 4);
|
||||
|
||||
// See if this is an insertion of the lower 128-bits of V2 into V1.
|
||||
bool IsInsert = true;
|
||||
int V2Index = -1;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
assert(WidenedMask[i] >= -1);
|
||||
if (WidenedMask[i] < 0)
|
||||
continue;
|
||||
|
||||
SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
|
||||
unsigned OpIndex = (i < Size/2) ? 0 : 1;
|
||||
// Make sure all V1 subvectors are in place.
|
||||
if (WidenedMask[i] < 4) {
|
||||
if (WidenedMask[i] != i) {
|
||||
IsInsert = false;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Make sure we only have a single V2 index and its the lowest 128-bits.
|
||||
if (V2Index >= 0 || WidenedMask[i] != 4) {
|
||||
IsInsert = false;
|
||||
break;
|
||||
}
|
||||
V2Index = i;
|
||||
}
|
||||
}
|
||||
if (IsInsert && V2Index >= 0) {
|
||||
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
|
||||
SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
|
||||
}
|
||||
|
||||
// Try to lower to to vshuf64x2/vshuf32x4.
|
||||
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
|
||||
unsigned PermMask = 0;
|
||||
// Insure elements came from the same Op.
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
assert(WidenedMask[i] >= -1);
|
||||
if (WidenedMask[i] < 0)
|
||||
continue;
|
||||
|
||||
SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
|
||||
unsigned OpIndex = i / 2;
|
||||
if (Ops[OpIndex].isUndef())
|
||||
Ops[OpIndex] = Op;
|
||||
else if (Ops[OpIndex] != Op)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Form a 128-bit permutation.
|
||||
// Convert the 64-bit shuffle mask selection values into 128-bit selection
|
||||
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
||||
unsigned PermMask = 0, Imm = 0;
|
||||
unsigned ControlBitsNum = WidenedMask.size() / 2;
|
||||
|
||||
for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
|
||||
// Use first element in place of undef mask.
|
||||
Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
|
||||
PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
|
||||
// Convert the 128-bit shuffle mask selection values into 128-bit selection
|
||||
// bits defined by a vshuf64x2 instruction's immediate control byte.
|
||||
PermMask |= (WidenedMask[i] % 4) << (i * 2);
|
||||
}
|
||||
|
||||
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
|
||||
@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
||||
static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
|
||||
int NumElements = Mask.size();
|
||||
|
||||
int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
|
||||
int NumV1Elements = 0, NumV2Elements = 0;
|
||||
for (int M : Mask)
|
||||
if (M < 0)
|
||||
++NumSentinelElements;
|
||||
continue;
|
||||
else if (M < NumElements)
|
||||
++NumV1Elements;
|
||||
else
|
||||
@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
|
||||
Mask, PassThru, Subtarget, DAG);
|
||||
}
|
||||
case INTR_TYPE_3OP_IMM8_MASK:
|
||||
case INTR_TYPE_3OP_MASK:
|
||||
case INSERT_SUBVEC: {
|
||||
case INTR_TYPE_3OP_MASK: {
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
|
||||
|
||||
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
|
||||
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
|
||||
else if (IntrData->Type == INSERT_SUBVEC) {
|
||||
// imm should be adapted to ISD::INSERT_SUBVECTOR behavior
|
||||
assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
|
||||
unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
|
||||
Imm *= Src2.getSimpleValueType().getVectorNumElements();
|
||||
Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
|
||||
}
|
||||
|
||||
// We specify 2 possible opcodes for intrinsics with rounding modes.
|
||||
// First, we check if the intrinsic may have non-default rounding mode,
|
||||
@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
|
||||
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
unsigned EltSize = EltVT.getSizeInBits();
|
||||
if (EltSize != 32 && EltSize != 64)
|
||||
return false;
|
||||
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
|
||||
// Only change element size, not type.
|
||||
if (VT.isInteger() != OpEltVT.isInteger())
|
||||
return false;
|
||||
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
|
||||
SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
|
||||
DCI.AddToWorklist(Op0.getNode());
|
||||
// Op1 needs to be bitcasted to a smaller vector with the same element type.
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
MVT Op1VT = MVT::getVectorVT(EltVT,
|
||||
Op1.getSimpleValueType().getSizeInBits() / EltSize);
|
||||
Op1 = DAG.getBitcast(Op1VT, Op1);
|
||||
DCI.AddToWorklist(Op1.getNode());
|
||||
DCI.CombineTo(OrigOp.getNode(),
|
||||
DAG.getNode(Opcode, DL, VT, Op0, Op1,
|
||||
DAG.getConstant(Imm, DL, MVT::i8)));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
|
||||
/// the codegen.
|
||||
/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
|
||||
static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget,
|
||||
SDLoc &DL) {
|
||||
assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
|
||||
SDValue Src = N->getOperand(0);
|
||||
unsigned Opcode = Src.getOpcode();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
||||
auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
|
||||
// TODO: Add extra cases where we can truncate both inputs for the
|
||||
// cost of one (or none).
|
||||
// e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
|
||||
if (Op0 == Op1)
|
||||
return true;
|
||||
|
||||
SDValue BC0 = peekThroughOneUseBitcasts(Op0);
|
||||
SDValue BC1 = peekThroughOneUseBitcasts(Op1);
|
||||
return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
|
||||
ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
|
||||
};
|
||||
|
||||
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
|
||||
SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
|
||||
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
|
||||
return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
|
||||
};
|
||||
|
||||
// Don't combine if the operation has other uses.
|
||||
if (!N->isOnlyUserOf(Src.getNode()))
|
||||
return SDValue();
|
||||
|
||||
// Only support vector truncation for now.
|
||||
// TODO: i64 scalar math would benefit as well.
|
||||
if (!VT.isVector())
|
||||
return SDValue();
|
||||
|
||||
// In most cases its only worth pre-truncating if we're only facing the cost
|
||||
// of one truncation.
|
||||
// i.e. if one of the inputs will constant fold or the input is repeated.
|
||||
switch (Opcode) {
|
||||
case ISD::AND:
|
||||
case ISD::XOR:
|
||||
case ISD::OR: {
|
||||
SDValue Op0 = Src.getOperand(0);
|
||||
SDValue Op1 = Src.getOperand(1);
|
||||
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
|
||||
IsRepeatedOpOrOneUseConstant(Op0, Op1))
|
||||
return TruncateArithmetic(Op0, Op1);
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::MUL:
|
||||
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
|
||||
// better to truncate if we have the chance.
|
||||
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
|
||||
!TLI.isOperationLegal(Opcode, SrcVT))
|
||||
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::ADD: {
|
||||
SDValue Op0 = Src.getOperand(0);
|
||||
SDValue Op1 = Src.getOperand(1);
|
||||
if (TLI.isOperationLegal(Opcode, VT) &&
|
||||
IsRepeatedOpOrOneUseConstant(Op0, Op1))
|
||||
return TruncateArithmetic(Op0, Op1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
|
||||
static SDValue
|
||||
combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
|
||||
@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
|
||||
SDValue Src = N->getOperand(0);
|
||||
SDLoc DL(N);
|
||||
|
||||
// Attempt to pre-truncate inputs to arithmetic ops instead.
|
||||
if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
|
||||
return V;
|
||||
|
||||
// Try to detect AVG pattern first.
|
||||
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
|
||||
return Avg;
|
||||
|
@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
|
||||
From.ZSuffix # "rrkz")
|
||||
To.KRCWM:$mask, From.RC:$src1,
|
||||
(EXTRACT_get_vextract_imm To.RC:$ext))>;
|
||||
|
||||
// Intrinsic call with masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x" # To.NumElts # "_" # From.Size)
|
||||
From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
|
||||
From.ZSuffix # "rrk")
|
||||
To.RC:$src0,
|
||||
(COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
|
||||
From.RC:$src1, imm:$idx)>;
|
||||
|
||||
// Intrinsic call with zero-masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x" # To.NumElts # "_" # From.Size)
|
||||
From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
|
||||
From.ZSuffix # "rrkz")
|
||||
(COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
|
||||
From.RC:$src1, imm:$idx)>;
|
||||
|
||||
// Intrinsic call without masking.
|
||||
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
|
||||
"x" # To.NumElts # "_" # From.Size)
|
||||
From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
|
||||
(!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
|
||||
From.ZSuffix # "rr")
|
||||
From.RC:$src1, imm:$idx)>;
|
||||
}
|
||||
|
||||
// Codegen pattern for the alternative types
|
||||
@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
|
||||
load, "ucomiss">, PS, EVEX, VEX_LIG,
|
||||
defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
|
||||
load, "ucomisd">, PD, EVEX,
|
||||
defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd">, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
|
||||
load, "comiss">, PS, EVEX, VEX_LIG,
|
||||
defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
|
||||
EVEX_CD8<32, CD8VT1>;
|
||||
defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
|
||||
load, "comisd">, PD, EVEX,
|
||||
defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd">, PD, EVEX,
|
||||
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
|
||||
}
|
||||
}
|
||||
|
@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
|
||||
Sched<[WriteFAddLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
|
||||
multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
|
||||
ValueType vt, Operand memop,
|
||||
ComplexPattern mem_cpat, string OpcodeStr> {
|
||||
def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
|
||||
IIC_SSE_COMIS_RR>,
|
||||
Sched<[WriteFAdd]>;
|
||||
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
|
||||
[(set EFLAGS, (OpNode (vt RC:$src1),
|
||||
mem_cpat:$src2))],
|
||||
IIC_SSE_COMIS_RM>,
|
||||
Sched<[WriteFAddLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS] in {
|
||||
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
|
||||
"ucomiss">, PS, VEX, VEX_LIG;
|
||||
@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
|
||||
load, "ucomiss">, PS, VEX;
|
||||
defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
|
||||
load, "ucomisd">, PD, VEX;
|
||||
defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss">, PS, VEX;
|
||||
defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd">, PD, VEX;
|
||||
|
||||
defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
|
||||
load, "comiss">, PS, VEX;
|
||||
defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
|
||||
load, "comisd">, PD, VEX;
|
||||
defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss">, PS, VEX;
|
||||
defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd">, PD, VEX;
|
||||
}
|
||||
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
|
||||
"ucomiss">, PS;
|
||||
@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
|
||||
load, "ucomiss">, PS;
|
||||
defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
|
||||
load, "ucomisd">, PD;
|
||||
defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
|
||||
sse_load_f32, "ucomiss">, PS;
|
||||
defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
|
||||
sse_load_f64, "ucomisd">, PD;
|
||||
|
||||
defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
|
||||
"comiss">, PS;
|
||||
defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
|
||||
"comisd">, PD;
|
||||
defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
|
||||
sse_load_f32, "comiss">, PS;
|
||||
defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
|
||||
sse_load_f64, "comisd">, PD;
|
||||
}
|
||||
} // Defs = [EFLAGS]
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
|
||||
//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {
|
||||
|
||||
// X86 EVEX encoded instructions that have a VEX 128 encoding
|
||||
// (table format: <EVEX opcode, VEX-128 opcode>).
|
||||
static const X86EvexToVexCompressTableEntry
|
||||
X86EvexToVex128CompressTable[] = {
|
||||
static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
|
||||
// EVEX scalar with corresponding VEX.
|
||||
{ X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm },
|
||||
{ X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr },
|
||||
@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VUCOMISDZrr , X86::VUCOMISDrr },
|
||||
{ X86::VUCOMISSZrm , X86::VUCOMISSrm },
|
||||
{ X86::VUCOMISSZrr , X86::VUCOMISSrr },
|
||||
|
||||
|
||||
{ X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr },
|
||||
{ X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
|
||||
{ X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm },
|
||||
{ X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr },
|
||||
{ X86::VMOVLHPSZrr , X86::VMOVLHPSrr },
|
||||
{ X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
|
||||
{ X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
|
||||
{ X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr },
|
||||
{ X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr },
|
||||
{ X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr },
|
||||
{ X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr },
|
||||
{ X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm },
|
||||
{ X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr },
|
||||
|
||||
|
||||
{ X86::VPEXTRBZmr , X86::VPEXTRBmr },
|
||||
{ X86::VPEXTRBZrr , X86::VPEXTRBrr },
|
||||
{ X86::VPEXTRDZmr , X86::VPEXTRDmr },
|
||||
@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPEXTRQZrr , X86::VPEXTRQrr },
|
||||
{ X86::VPEXTRWZmr , X86::VPEXTRWmr },
|
||||
{ X86::VPEXTRWZrr , X86::VPEXTRWri },
|
||||
|
||||
|
||||
{ X86::VPINSRBZrm , X86::VPINSRBrm },
|
||||
{ X86::VPINSRBZrr , X86::VPINSRBrr },
|
||||
{ X86::VPINSRDZrm , X86::VPINSRDrm },
|
||||
@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VANDPDZ128rm , X86::VANDPDrm },
|
||||
{ X86::VANDPDZ128rr , X86::VANDPDrr },
|
||||
{ X86::VANDPSZ128rm , X86::VANDPSrm },
|
||||
{ X86::VANDPSZ128rr , X86::VANDPSrr },
|
||||
{ X86::VANDPSZ128rr , X86::VANDPSrr },
|
||||
{ X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
|
||||
{ X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
|
||||
{ X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
|
||||
@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VMOVAPDZ128rm , X86::VMOVAPDrm },
|
||||
{ X86::VMOVAPDZ128rr , X86::VMOVAPDrr },
|
||||
{ X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV },
|
||||
{ X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
|
||||
{ X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
|
||||
{ X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
|
||||
{ X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
|
||||
{ X86::VMOVAPSZ128rr , X86::VMOVAPSrr },
|
||||
{ X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV },
|
||||
{ X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm },
|
||||
@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VMOVUPDZ128rm , X86::VMOVUPDrm },
|
||||
{ X86::VMOVUPDZ128rr , X86::VMOVUPDrr },
|
||||
{ X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV },
|
||||
{ X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
|
||||
{ X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
|
||||
{ X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
|
||||
{ X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
|
||||
{ X86::VMOVUPSZ128rr , X86::VMOVUPSrr },
|
||||
{ X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV },
|
||||
{ X86::VMULPDZ128rm , X86::VMULPDrm },
|
||||
@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr },
|
||||
{ X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm },
|
||||
{ X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr },
|
||||
{ X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
|
||||
{ X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
|
||||
{ X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
|
||||
{ X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
|
||||
{ X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
|
||||
{ X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
|
||||
{ X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr },
|
||||
{ X86::VPERMILPDZ128mi , X86::VPERMILPDmi },
|
||||
{ X86::VPERMILPDZ128ri , X86::VPERMILPDri },
|
||||
@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm },
|
||||
{ X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr },
|
||||
{ X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm },
|
||||
{ X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
|
||||
{ X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
|
||||
{ X86::VPMULDQZ128rm , X86::VPMULDQrm },
|
||||
{ X86::VPMULDQZ128rr , X86::VPMULDQrr },
|
||||
{ X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm },
|
||||
@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPSHUFHWZ128ri , X86::VPSHUFHWri },
|
||||
{ X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi },
|
||||
{ X86::VPSHUFLWZ128ri , X86::VPSHUFLWri },
|
||||
{ X86::VPSLLDQZ128rr , X86::VPSLLDQri },
|
||||
{ X86::VPSLLDQZ128rr , X86::VPSLLDQri },
|
||||
{ X86::VPSLLDZ128ri , X86::VPSLLDri },
|
||||
{ X86::VPSLLDZ128rm , X86::VPSLLDrm },
|
||||
{ X86::VPSLLDZ128rr , X86::VPSLLDrr },
|
||||
{ X86::VPSLLDZ128rr , X86::VPSLLDrr },
|
||||
{ X86::VPSLLQZ128ri , X86::VPSLLQri },
|
||||
{ X86::VPSLLQZ128rm , X86::VPSLLQrm },
|
||||
{ X86::VPSLLQZ128rr , X86::VPSLLQrr },
|
||||
@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
|
||||
// X86 EVEX encoded instructions that have a VEX 256 encoding
|
||||
// (table format: <EVEX opcode, VEX-256 opcode>).
|
||||
static const X86EvexToVexCompressTableEntry
|
||||
X86EvexToVex256CompressTable[] = {
|
||||
static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
|
||||
{ X86::VADDPDZ256rm , X86::VADDPDYrm },
|
||||
{ X86::VADDPDZ256rr , X86::VADDPDYrr },
|
||||
{ X86::VADDPSZ256rm , X86::VADDPSYrm },
|
||||
@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VANDPDZ256rr , X86::VANDPDYrr },
|
||||
{ X86::VANDPSZ256rm , X86::VANDPSYrm },
|
||||
{ X86::VANDPSZ256rr , X86::VANDPSYrr },
|
||||
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
|
||||
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
|
||||
{ X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
|
||||
{ X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
|
||||
{ X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
|
||||
{ X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
|
||||
{ X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
|
||||
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
|
||||
{ X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
|
||||
{ X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
|
||||
{ X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
|
||||
{ X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
|
||||
@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VDIVPDZ256rr , X86::VDIVPDYrr },
|
||||
{ X86::VDIVPSZ256rm , X86::VDIVPSYrm },
|
||||
{ X86::VDIVPSZ256rr , X86::VDIVPSYrr },
|
||||
{ X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
|
||||
{ X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
|
||||
{ X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
|
||||
{ X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
|
||||
{ X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
|
||||
{ X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
|
||||
{ X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
|
||||
{ X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
|
||||
{ X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
|
||||
{ X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
|
||||
{ X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
|
||||
@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
|
||||
{ X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
|
||||
{ X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
|
||||
{ X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
|
||||
{ X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
|
||||
{ X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
|
||||
{ X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
|
||||
{ X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
|
||||
{ X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
|
||||
{ X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
|
||||
{ X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
|
||||
{ X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
|
||||
{ X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
|
||||
{ X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
|
||||
@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VMOVAPDZ256rm , X86::VMOVAPDYrm },
|
||||
{ X86::VMOVAPDZ256rr , X86::VMOVAPDYrr },
|
||||
{ X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV },
|
||||
{ X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
|
||||
{ X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
|
||||
{ X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
|
||||
{ X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
|
||||
{ X86::VMOVAPSZ256rr , X86::VMOVAPSYrr },
|
||||
{ X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV },
|
||||
{ X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm },
|
||||
@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPAVGBZ256rr , X86::VPAVGBYrr },
|
||||
{ X86::VPAVGWZ256rm , X86::VPAVGWYrm },
|
||||
{ X86::VPAVGWZ256rr , X86::VPAVGWYrr },
|
||||
{ X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
|
||||
{ X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
|
||||
{ X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
|
||||
{ X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
|
||||
{ X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
|
||||
{ X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
|
||||
{ X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
|
||||
{ X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
|
||||
{ X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
|
||||
{ X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
|
||||
{ X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
|
||||
{ X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
|
||||
{ X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
|
||||
{ X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
|
||||
{ X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
|
||||
{ X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
|
||||
{ X86::VPERMDZ256rm , X86::VPERMDYrm },
|
||||
{ X86::VPERMDZ256rr , X86::VPERMDYrr },
|
||||
{ X86::VPERMILPDZ256mi , X86::VPERMILPDYmi },
|
||||
@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPSLLDQZ256rr , X86::VPSLLDQYri },
|
||||
{ X86::VPSLLDZ256ri , X86::VPSLLDYri },
|
||||
{ X86::VPSLLDZ256rm , X86::VPSLLDYrm },
|
||||
{ X86::VPSLLDZ256rr , X86::VPSLLDYrr },
|
||||
{ X86::VPSLLDZ256rr , X86::VPSLLDYrr },
|
||||
{ X86::VPSLLQZ256ri , X86::VPSLLQYri },
|
||||
{ X86::VPSLLQZ256rm , X86::VPSLLQYrm },
|
||||
{ X86::VPSLLQZ256rr , X86::VPSLLQYrr },
|
||||
@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPSLLVQZ256rr , X86::VPSLLVQYrr },
|
||||
{ X86::VPSLLWZ256ri , X86::VPSLLWYri },
|
||||
{ X86::VPSLLWZ256rm , X86::VPSLLWYrm },
|
||||
{ X86::VPSLLWZ256rr , X86::VPSLLWYrr },
|
||||
{ X86::VPSLLWZ256rr , X86::VPSLLWYrr },
|
||||
{ X86::VPSRADZ256ri , X86::VPSRADYri },
|
||||
{ X86::VPSRADZ256rm , X86::VPSRADYrm },
|
||||
{ X86::VPSRADZ256rr , X86::VPSRADYrr },
|
||||
@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VPSRLDQZ256rr , X86::VPSRLDQYri },
|
||||
{ X86::VPSRLDZ256ri , X86::VPSRLDYri },
|
||||
{ X86::VPSRLDZ256rm , X86::VPSRLDYrm },
|
||||
{ X86::VPSRLDZ256rr , X86::VPSRLDYrr },
|
||||
{ X86::VPSRLDZ256rr , X86::VPSRLDYrr },
|
||||
{ X86::VPSRLQZ256ri , X86::VPSRLQYri },
|
||||
{ X86::VPSRLQZ256rm , X86::VPSRLQYrm },
|
||||
{ X86::VPSRLQZ256rr , X86::VPSRLQYrr },
|
||||
@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
|
||||
{ X86::VXORPSZ256rr , X86::VXORPSYrr },
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
|
||||
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
|
||||
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
|
||||
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
|
||||
EXPAND_FROM_MEM, INSERT_SUBVEC,
|
||||
EXPAND_FROM_MEM,
|
||||
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
|
||||
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
|
||||
};
|
||||
@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::VGETMANTS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
|
||||
X86ISD::VGETMANTS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
|
||||
ISD::INSERT_SUBVECTOR, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
|
||||
ISD::CTLZ, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
|
||||
|
@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
|
||||
OutStreamer->EmitInstruction(TC, getSubtargetInfo());
|
||||
}
|
||||
|
||||
void X86AsmPrinter::EmitXRayTable() {
|
||||
if (Sleds.empty())
|
||||
return;
|
||||
|
||||
auto PrevSection = OutStreamer->getCurrentSectionOnly();
|
||||
auto Fn = MF->getFunction();
|
||||
MCSection *Section = nullptr;
|
||||
if (Subtarget->isTargetELF()) {
|
||||
if (Fn->hasComdat()) {
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
|
||||
Fn->getComdat()->getName());
|
||||
} else {
|
||||
Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
|
||||
ELF::SHF_ALLOC);
|
||||
}
|
||||
} else if (Subtarget->isTargetMachO()) {
|
||||
Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
|
||||
SectionKind::getReadOnlyWithRel());
|
||||
} else {
|
||||
llvm_unreachable("Unsupported target");
|
||||
}
|
||||
|
||||
// Before we switch over, we force a reference to a label inside the
|
||||
// xray_instr_map section. Since EmitXRayTable() is always called just
|
||||
// before the function's end, we assume that this is happening after the
|
||||
// last return instruction.
|
||||
//
|
||||
// We then align the reference to 16 byte boundaries, which we determined
|
||||
// experimentally to be beneficial to avoid causing decoder stalls.
|
||||
MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
|
||||
OutStreamer->EmitCodeAlignment(16);
|
||||
OutStreamer->EmitSymbolValue(Tmp, 8, false);
|
||||
OutStreamer->SwitchSection(Section);
|
||||
OutStreamer->EmitLabel(Tmp);
|
||||
for (const auto &Sled : Sleds) {
|
||||
OutStreamer->EmitSymbolValue(Sled.Sled, 8);
|
||||
OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
|
||||
auto Kind = static_cast<uint8_t>(Sled.Kind);
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Kind), 1));
|
||||
OutStreamer->EmitBytes(
|
||||
StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
|
||||
OutStreamer->EmitZeros(14);
|
||||
}
|
||||
OutStreamer->SwitchSection(PrevSection);
|
||||
|
||||
Sleds.clear();
|
||||
}
|
||||
|
||||
// Returns instruction preceding MBBI in MachineFunction.
|
||||
// If MBBI is the first instruction of the first basic block, returns null.
|
||||
static MachineBasicBlock::const_iterator
|
||||
|
@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost(
|
||||
|
||||
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
Type *SubTp) {
|
||||
|
||||
if (Kind == TTI::SK_Reverse) {
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb
|
||||
};
|
||||
|
||||
if (ST->hasVBMI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX512BWShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
|
||||
// + 2*pshufb + vinserti64x4
|
||||
};
|
||||
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX512ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
|
||||
};
|
||||
|
||||
if (ST->hasAVX512())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX2ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb
|
||||
};
|
||||
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX1ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
|
||||
// + vinsertf128
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb
|
||||
// + vinsertf128
|
||||
};
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSSE3ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb
|
||||
};
|
||||
|
||||
if (ST->hasSSSE3())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE2ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw
|
||||
// + 2*pshufd + 2*unpck + packus
|
||||
};
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE1ShuffleTbl[] = {
|
||||
{ ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
|
||||
};
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
} else if (Kind == TTI::SK_Alternate) {
|
||||
if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
|
||||
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
|
||||
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
|
||||
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
|
||||
|
||||
// The backend knows how to generate a single VEX.256 version of
|
||||
// instruction VPBLENDW if the target supports AVX2.
|
||||
if (ST->hasAVX2() && LT.second == MVT::v16i16)
|
||||
return LT.first;
|
||||
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
|
||||
{ TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
|
||||
};
|
||||
|
||||
static const CostTblEntry AVXAltShuffleTbl[] = {
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
|
||||
if (ST->hasVBMI())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
|
||||
static const CostTblEntry AVX512BWShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
|
||||
{ TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
|
||||
{ TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
|
||||
// + 2*pshufb + vinserti64x4
|
||||
};
|
||||
|
||||
// This shuffle is custom lowered into a sequence of:
|
||||
// 2x vextractf128 , 2x vpblendw , 1x vinsertf128
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
|
||||
if (ST->hasBWI())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
// This shuffle is custom lowered into a long sequence of:
|
||||
// 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
|
||||
static const CostTblEntry AVX512ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
|
||||
{ TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
|
||||
{ TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
|
||||
{ TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
|
||||
};
|
||||
|
||||
if (ST->hasAVX512())
|
||||
if (const auto *Entry =
|
||||
CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX2ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
|
||||
{ TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
|
||||
{ TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
|
||||
{ TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
|
||||
{ TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
|
||||
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
|
||||
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
|
||||
{ TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
|
||||
};
|
||||
|
||||
if (ST->hasAVX2())
|
||||
if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry AVX1ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
|
||||
{ TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
|
||||
{ TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
|
||||
{ TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
|
||||
{ TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
|
||||
// + vinsertf128
|
||||
{ TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
|
||||
// + vinsertf128
|
||||
|
||||
{ TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
|
||||
{ TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
|
||||
{ TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
|
||||
{ TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
|
||||
{ TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
|
||||
{ TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
|
||||
};
|
||||
|
||||
if (ST->hasAVX())
|
||||
if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE41AltShuffleTbl[] = {
|
||||
// These are lowered into movsd.
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
|
||||
|
||||
// packed float vectors with four elements are lowered into BLENDI dag
|
||||
// nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
|
||||
|
||||
// This shuffle generates a single pshufw.
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
|
||||
|
||||
// There is no instruction that matches a v16i8 alternate shuffle.
|
||||
// The backend will expand it into the sequence 'pshufb + pshufb + or'.
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
|
||||
static const CostTblEntry SSE41ShuffleTbl[] = {
|
||||
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
|
||||
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
|
||||
{ TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
|
||||
{ TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
|
||||
};
|
||||
|
||||
if (ST->hasSSE41())
|
||||
if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
|
||||
LT.second))
|
||||
if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSSE3AltShuffleTbl[] = {
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
|
||||
static const CostTblEntry SSSE3ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
|
||||
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
|
||||
|
||||
// SSE3 doesn't have 'blendps'. The following shuffles are expanded into
|
||||
// the sequence 'shufps + pshufd'
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
|
||||
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
|
||||
};
|
||||
|
||||
if (ST->hasSSSE3())
|
||||
if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSEAltShuffleTbl[] = {
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
|
||||
static const CostTblEntry SSE2ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
|
||||
{ TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
|
||||
{ TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
|
||||
{ TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
|
||||
{ TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
|
||||
// + 2*pshufd + 2*unpck + packus
|
||||
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
|
||||
|
||||
// This is expanded into a long sequence of four extract + four insert.
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
|
||||
|
||||
// 8 x (pinsrw + pextrw + and + movb + movzb + or)
|
||||
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
|
||||
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
|
||||
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
|
||||
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
|
||||
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
|
||||
{ TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
|
||||
};
|
||||
|
||||
// Fall-back (SSE3 and SSE2).
|
||||
if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
|
||||
ISD::VECTOR_SHUFFLE, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
static const CostTblEntry SSE1ShuffleTbl[] = {
|
||||
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
|
||||
{ TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
|
||||
};
|
||||
|
||||
if (ST->hasSSE1())
|
||||
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
} else if (Kind == TTI::SK_PermuteTwoSrc) {
|
||||
// We assume that source and destination have the same vector type.
|
||||
|
@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
|
||||
// add(zext(xor i16 X, -32768), -32768) --> sext X
|
||||
return CastInst::Create(Instruction::SExt, X, LHS->getType());
|
||||
}
|
||||
|
||||
if (Val->isNegative() &&
|
||||
match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
|
||||
Val->sge(-C->sext(Val->getBitWidth()))) {
|
||||
// (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
|
||||
return CastInst::Create(
|
||||
Instruction::ZExt,
|
||||
Builder->CreateNUWAdd(
|
||||
X, Constant::getIntegerValue(X->getType(),
|
||||
*C + Val->trunc(C->getBitWidth()))),
|
||||
I.getType());
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Use the match above instead of dyn_cast to allow these transforms
|
||||
|
@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
return replaceInstUsesWith(*II, V);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::fmuladd: {
|
||||
Value *Src0 = II->getArgOperand(0);
|
||||
Value *Src1 = II->getArgOperand(1);
|
||||
|
||||
// Canonicalize constants into the RHS.
|
||||
if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
|
||||
II->setArgOperand(0, Src1);
|
||||
II->setArgOperand(1, Src0);
|
||||
std::swap(Src0, Src1);
|
||||
}
|
||||
|
||||
Value *LHS = nullptr;
|
||||
Value *RHS = nullptr;
|
||||
|
||||
// fma fneg(x), fneg(y), z -> fma x, y, z
|
||||
if (match(Src0, m_FNeg(m_Value(LHS))) &&
|
||||
match(Src1, m_FNeg(m_Value(RHS)))) {
|
||||
CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
|
||||
{LHS, RHS, II->getArgOperand(2)});
|
||||
NewCall->takeName(II);
|
||||
NewCall->copyFastMathFlags(II);
|
||||
return replaceInstUsesWith(*II, NewCall);
|
||||
}
|
||||
|
||||
// fma fabs(x), fabs(x), z -> fma x, x, z
|
||||
if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(LHS))) &&
|
||||
match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Value(RHS))) && LHS == RHS) {
|
||||
CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
|
||||
{LHS, LHS, II->getArgOperand(2)});
|
||||
NewCall->takeName(II);
|
||||
NewCall->copyFastMathFlags(II);
|
||||
return replaceInstUsesWith(*II, NewCall);
|
||||
}
|
||||
|
||||
// fma x, 1, z -> fadd x, z
|
||||
if (match(Src1, m_FPOne())) {
|
||||
Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
|
||||
RI->copyFastMathFlags(II);
|
||||
return RI;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::fabs: {
|
||||
Value *Cond;
|
||||
Constant *LHS, *RHS;
|
||||
if (match(II->getArgOperand(0),
|
||||
m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
|
||||
CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS});
|
||||
CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS});
|
||||
return SelectInst::Create(Cond, Call0, Call1);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::ppc_altivec_lvx:
|
||||
case Intrinsic::ppc_altivec_lvxl:
|
||||
// Turn PPC lvx -> load if the pointer is known aligned.
|
||||
@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
|
||||
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
|
||||
// (if assume is valid at the load)
|
||||
if (ICmpInst* ICmp = dyn_cast<ICmpInst>(IIOperand)) {
|
||||
Value *LHS = ICmp->getOperand(0);
|
||||
Value *RHS = ICmp->getOperand(1);
|
||||
if (ICmpInst::ICMP_NE == ICmp->getPredicate() &&
|
||||
isa<LoadInst>(LHS) &&
|
||||
isa<Constant>(RHS) &&
|
||||
RHS->getType()->isPointerTy() &&
|
||||
cast<Constant>(RHS)->isNullValue()) {
|
||||
LoadInst* LI = cast<LoadInst>(LHS);
|
||||
if (isValidAssumeForContext(II, LI, &DT)) {
|
||||
MDNode *MD = MDNode::get(II->getContext(), None);
|
||||
LI->setMetadata(LLVMContext::MD_nonnull, MD);
|
||||
return eraseInstFromFunction(*II);
|
||||
}
|
||||
}
|
||||
CmpInst::Predicate Pred;
|
||||
Instruction *LHS;
|
||||
if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
|
||||
Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
|
||||
LHS->getType()->isPointerTy() &&
|
||||
isValidAssumeForContext(II, LHS, &DT)) {
|
||||
MDNode *MD = MDNode::get(II->getContext(), None);
|
||||
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
|
||||
return eraseInstFromFunction(*II);
|
||||
|
||||
// TODO: apply nonnull return attributes to calls and invokes
|
||||
// TODO: apply range metadata for range check patterns?
|
||||
}
|
||||
|
||||
// If there is a dominating assume with the same condition as this one,
|
||||
// then this one is redundant, and should be removed.
|
||||
APInt KnownZero(1, 0), KnownOne(1, 0);
|
||||
|
@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
|
||||
// separated by a few arithmetic operations.
|
||||
BasicBlock::iterator BBI(LI);
|
||||
bool IsLoadCSE = false;
|
||||
if (Value *AvailableVal =
|
||||
FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
|
||||
DefMaxInstsToScan, AA, &IsLoadCSE)) {
|
||||
if (IsLoadCSE) {
|
||||
LoadInst *NLI = cast<LoadInst>(AvailableVal);
|
||||
unsigned KnownIDs[] = {
|
||||
LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
|
||||
LLVMContext::MD_noalias, LLVMContext::MD_range,
|
||||
LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
|
||||
LLVMContext::MD_invariant_group, LLVMContext::MD_align,
|
||||
LLVMContext::MD_dereferenceable,
|
||||
LLVMContext::MD_dereferenceable_or_null};
|
||||
combineMetadata(NLI, &LI, KnownIDs);
|
||||
};
|
||||
if (Value *AvailableVal = FindAvailableLoadedValue(
|
||||
&LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
|
||||
if (IsLoadCSE)
|
||||
combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
|
||||
|
||||
return replaceInstUsesWith(
|
||||
LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
|
||||
|
@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
|
||||
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
|
||||
unsigned ShAmt = Op1C->getZExtValue();
|
||||
|
||||
// Turn:
|
||||
// %zext = zext i32 %V to i64
|
||||
// %res = shl i64 %V, 8
|
||||
//
|
||||
// Into:
|
||||
// %shl = shl i32 %V, 8
|
||||
// %res = zext i32 %shl to i64
|
||||
//
|
||||
// This is only valid if %V would have zeros shifted out.
|
||||
if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
|
||||
unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
|
||||
if (ShAmt < SrcBitWidth &&
|
||||
MaskedValueIsZero(ZI->getOperand(0),
|
||||
APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
|
||||
auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
|
||||
return new ZExtInst(Shl, I.getType());
|
||||
}
|
||||
}
|
||||
|
||||
// If the shifted-out value is known-zero, then this is a NUW shift.
|
||||
if (!I.hasNoUnsignedWrap() &&
|
||||
MaskedValueIsZero(I.getOperand(0),
|
||||
|
@ -481,9 +481,9 @@ private:
|
||||
bool processNode(DomTreeNode *Node);
|
||||
|
||||
Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
|
||||
if (auto *LI = dyn_cast<LoadInst>(Inst))
|
||||
return LI;
|
||||
else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
|
||||
if (auto *SI = dyn_cast<StoreInst>(Inst))
|
||||
return SI->getValueOperand();
|
||||
assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
|
||||
return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
|
||||
|
@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
|
||||
STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
|
||||
STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
|
||||
STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
|
||||
STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GVN Pass
|
||||
@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
|
||||
// Unlike loads, we never try to eliminate stores, so we do not check if they
|
||||
// are simple and avoid value numbering them.
|
||||
auto *SI = cast<StoreInst>(I);
|
||||
// If this store's memorydef stores the same value as the last store, the
|
||||
// memory accesses are equivalent.
|
||||
// Get the expression, if any, for the RHS of the MemoryDef.
|
||||
MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
|
||||
MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
|
||||
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
|
||||
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
|
||||
// See if this store expression already has a value, and it's the same as our
|
||||
// current store. FIXME: Right now, we only do this for simple stores.
|
||||
// See if we are defined by a previous store expression, it already has a
|
||||
// value, and it's the same value as our current store. FIXME: Right now, we
|
||||
// only do this for simple stores, we should expand to cover memcpys, etc.
|
||||
if (SI->isSimple()) {
|
||||
// Get the expression, if any, for the RHS of the MemoryDef.
|
||||
MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
|
||||
cast<MemoryDef>(StoreAccess)->getDefiningAccess());
|
||||
const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
|
||||
CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
|
||||
if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
|
||||
CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
|
||||
@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
|
||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||
if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
|
||||
// If this is a MemoryDef, we need to update the equivalence table. If
|
||||
// we
|
||||
// determined the expression is congruent to a different memory state,
|
||||
// use that different memory state. If we determined it didn't, we
|
||||
// update
|
||||
// that as well. Note that currently, we do not guarantee the
|
||||
// "different" memory state dominates us. The goal is to make things
|
||||
// that are congruent look congruent, not ensure we can eliminate one in
|
||||
// favor of the other.
|
||||
// Right now, the only way they can be equivalent is for store
|
||||
// expresions.
|
||||
if (!isa<MemoryUse>(MA)) {
|
||||
if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
|
||||
auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
|
||||
setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
|
||||
} else {
|
||||
setMemoryAccessEquivTo(MA, nullptr);
|
||||
}
|
||||
// we determined the expression is congruent to a different memory
|
||||
// state, use that different memory state. If we determined it didn't,
|
||||
// we update that as well. Right now, we only support store
|
||||
// expressions.
|
||||
if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
|
||||
EClass->Members.size() != 1) {
|
||||
auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
|
||||
setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
|
||||
} else {
|
||||
setMemoryAccessEquivTo(MA, nullptr);
|
||||
}
|
||||
markMemoryUsersTouched(MA);
|
||||
}
|
||||
@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) {
|
||||
} else {
|
||||
// Handle terminators that return values. All of them produce values we
|
||||
// don't currently understand.
|
||||
if (!I->getType()->isVoidTy()){
|
||||
if (!I->getType()->isVoidTy()) {
|
||||
auto *Symbolized = createUnknownExpression(I);
|
||||
performCongruenceFinding(I, Symbolized);
|
||||
}
|
||||
@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() {
|
||||
continue;
|
||||
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
|
||||
auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
|
||||
if (FirstMUD && SecondMUD) {
|
||||
auto *FirstInst = FirstMUD->getMemoryInst();
|
||||
auto *SecondInst = SecondMUD->getMemoryInst();
|
||||
if (FirstMUD && SecondMUD)
|
||||
assert(
|
||||
ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
|
||||
ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
|
||||
ValueToClass.lookup(SecondMUD->getMemoryInst()) &&
|
||||
"The instructions for these memory operations should have been in "
|
||||
"the same congruence class");
|
||||
}
|
||||
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
|
||||
|
||||
// We can only sanely verify that MemoryDefs in the operand list all have
|
||||
@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
|
||||
|
||||
initializeCongruenceClasses(F);
|
||||
|
||||
unsigned int Iterations = 0;
|
||||
// We start out in the entry block.
|
||||
BasicBlock *LastBlock = &F.getEntryBlock();
|
||||
while (TouchedInstructions.any()) {
|
||||
++Iterations;
|
||||
// Walk through all the instructions in all the blocks in RPO.
|
||||
for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
|
||||
InstrNum = TouchedInstructions.find_next(InstrNum)) {
|
||||
@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
|
||||
TouchedInstructions.reset(InstrNum);
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Move this to expensive checks when we are satisfied with NewGVN
|
||||
NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
|
||||
#ifndef NDEBUG
|
||||
verifyMemoryCongruency();
|
||||
#endif
|
||||
@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
|
||||
|
||||
// Cleanup the congruence class.
|
||||
SmallPtrSet<Value *, 4> MembersLeft;
|
||||
for (Value * Member : CC->Members) {
|
||||
for (Value *Member : CC->Members) {
|
||||
if (Member->getType()->isVoidTy()) {
|
||||
MembersLeft.insert(Member);
|
||||
continue;
|
||||
|
@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS,
|
||||
|
||||
/// When inlining a function that contains noalias scope metadata,
|
||||
/// this metadata needs to be cloned so that the inlined blocks
|
||||
/// have different "unqiue scopes" at every call site. Were this not done, then
|
||||
/// have different "unique scopes" at every call site. Were this not done, then
|
||||
/// aliasing scopes from a function inlined into a caller multiple times could
|
||||
/// not be differentiated (and this would lead to miscompiles because the
|
||||
/// non-aliasing property communicated by the metadata could have
|
||||
|
@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
|
||||
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
|
||||
|
||||
uint64_t TrueWeight, FalseWeight;
|
||||
uint64_t ExitWeight = 0, BackEdgeWeight = 0;
|
||||
uint64_t ExitWeight = 0, CurHeaderWeight = 0;
|
||||
if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
|
||||
ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
|
||||
BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
|
||||
// The # of times the loop body executes is the sum of the exit block
|
||||
// weight and the # of times the backedges are taken.
|
||||
CurHeaderWeight = TrueWeight + FalseWeight;
|
||||
}
|
||||
|
||||
// For each peeled-off iteration, make a copy of the loop.
|
||||
@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
|
||||
SmallVector<BasicBlock *, 8> NewBlocks;
|
||||
ValueToValueMapTy VMap;
|
||||
|
||||
// The exit weight of the previous iteration is the header entry weight
|
||||
// of the current iteration. So this is exactly how many dynamic iterations
|
||||
// the current peeled-off static iteration uses up.
|
||||
// Subtract the exit weight from the current header weight -- the exit
|
||||
// weight is exactly the weight of the previous iteration's header.
|
||||
// FIXME: due to the way the distribution is constructed, we need a
|
||||
// guard here to make sure we don't end up with non-positive weights.
|
||||
if (ExitWeight < BackEdgeWeight)
|
||||
BackEdgeWeight -= ExitWeight;
|
||||
if (ExitWeight < CurHeaderWeight)
|
||||
CurHeaderWeight -= ExitWeight;
|
||||
else
|
||||
BackEdgeWeight = 1;
|
||||
CurHeaderWeight = 1;
|
||||
|
||||
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
|
||||
NewBlocks, LoopBlocks, VMap, LVMap, LI);
|
||||
@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
|
||||
|
||||
// Adjust the branch weights on the loop exit.
|
||||
if (ExitWeight) {
|
||||
// The backedge count is the difference of current header weight and
|
||||
// current loop exit weight. If the current header weight is smaller than
|
||||
// the current loop exit weight, we mark the loop backedge weight as 1.
|
||||
uint64_t BackEdgeWeight = 0;
|
||||
if (ExitWeight < CurHeaderWeight)
|
||||
BackEdgeWeight = CurHeaderWeight - ExitWeight;
|
||||
else
|
||||
BackEdgeWeight = 1;
|
||||
MDBuilder MDB(LatchBR->getContext());
|
||||
MDNode *WeightNode =
|
||||
HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
|
||||
|
@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
|
||||
I0->getOperandUse(O).set(NewOperands[O]);
|
||||
I0->moveBefore(&*BBEnd->getFirstInsertionPt());
|
||||
|
||||
// Update metadata and IR flags.
|
||||
// The debug location for the "common" instruction is the merged locations of
|
||||
// all the commoned instructions. We start with the original location of the
|
||||
// "common" instruction and iteratively merge each location in the loop below.
|
||||
DILocation *Loc = I0->getDebugLoc();
|
||||
|
||||
// Update metadata and IR flags, and merge debug locations.
|
||||
for (auto *I : Insts)
|
||||
if (I != I0) {
|
||||
Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
|
||||
combineMetadataForCSE(I0, I);
|
||||
I0->andIRFlags(I);
|
||||
}
|
||||
if (!isa<CallInst>(I0))
|
||||
I0->setDebugLoc(Loc);
|
||||
|
||||
if (!isa<StoreInst>(I0)) {
|
||||
// canSinkLastInstruction checked that all instructions were used by
|
||||
|
@ -2366,7 +2366,11 @@ enum CXCursorKind {
|
||||
*/
|
||||
CXCursor_OMPTargetTeamsDistributeParallelForDirective = 277,
|
||||
|
||||
CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForDirective,
|
||||
/** \brief OpenMP target teams distribute parallel for simd directive.
|
||||
*/
|
||||
CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective = 278,
|
||||
|
||||
CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective,
|
||||
|
||||
/**
|
||||
* \brief Cursor that represents the translation unit itself.
|
||||
|
@ -2669,6 +2669,9 @@ DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeDirective,
|
||||
DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForDirective,
|
||||
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
|
||||
|
||||
DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForSimdDirective,
|
||||
{ TRY_TO(TraverseOMPExecutableDirective(S)); })
|
||||
|
||||
// OpenMP clauses.
|
||||
template <typename Derived>
|
||||
bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
|
||||
|
@ -3638,6 +3638,79 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents '#pragma omp target teams distribute parallel for simd'
|
||||
/// combined directive.
|
||||
///
|
||||
/// \code
|
||||
/// #pragma omp target teams distribute parallel for simd private(x)
|
||||
/// \endcode
|
||||
/// In this example directive '#pragma omp target teams distribute parallel
|
||||
/// for simd' has clause 'private' with the variables 'x'
|
||||
///
|
||||
class OMPTargetTeamsDistributeParallelForSimdDirective final
|
||||
: public OMPLoopDirective {
|
||||
friend class ASTStmtReader;
|
||||
|
||||
/// Build directive with the given start and end location.
|
||||
///
|
||||
/// \param StartLoc Starting location of the directive kind.
|
||||
/// \param EndLoc Ending location of the directive.
|
||||
/// \param CollapsedNum Number of collapsed nested loops.
|
||||
/// \param NumClauses Number of clauses.
|
||||
///
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective(SourceLocation StartLoc,
|
||||
SourceLocation EndLoc,
|
||||
unsigned CollapsedNum,
|
||||
unsigned NumClauses)
|
||||
: OMPLoopDirective(this,
|
||||
OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
|
||||
OMPD_target_teams_distribute_parallel_for_simd,
|
||||
StartLoc, EndLoc, CollapsedNum, NumClauses) {}
|
||||
|
||||
/// Build an empty directive.
|
||||
///
|
||||
/// \param CollapsedNum Number of collapsed nested loops.
|
||||
/// \param NumClauses Number of clauses.
|
||||
///
|
||||
explicit OMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
unsigned CollapsedNum, unsigned NumClauses)
|
||||
: OMPLoopDirective(
|
||||
this, OMPTargetTeamsDistributeParallelForSimdDirectiveClass,
|
||||
OMPD_target_teams_distribute_parallel_for_simd, SourceLocation(),
|
||||
SourceLocation(), CollapsedNum, NumClauses) {}
|
||||
|
||||
public:
|
||||
/// Creates directive with a list of \a Clauses.
|
||||
///
|
||||
/// \param C AST context.
|
||||
/// \param StartLoc Starting location of the directive kind.
|
||||
/// \param EndLoc Ending Location of the directive.
|
||||
/// \param CollapsedNum Number of collapsed loops.
|
||||
/// \param Clauses List of clauses.
|
||||
/// \param AssociatedStmt Statement, associated with the directive.
|
||||
/// \param Exprs Helper expressions for CodeGen.
|
||||
///
|
||||
static OMPTargetTeamsDistributeParallelForSimdDirective *
|
||||
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
|
||||
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
|
||||
Stmt *AssociatedStmt, const HelperExprs &Exprs);
|
||||
|
||||
/// Creates an empty directive with the place for \a NumClauses clauses.
|
||||
///
|
||||
/// \param C AST context.
|
||||
/// \param CollapsedNum Number of collapsed nested loops.
|
||||
/// \param NumClauses Number of clauses.
|
||||
///
|
||||
static OMPTargetTeamsDistributeParallelForSimdDirective *
|
||||
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
|
||||
EmptyShell);
|
||||
|
||||
static bool classof(const Stmt *T) {
|
||||
return T->getStmtClass() ==
|
||||
OMPTargetTeamsDistributeParallelForSimdDirectiveClass;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
||||
#endif
|
||||
|
@ -3921,6 +3921,8 @@ def ext_ms_deref_template_argument: ExtWarn<
|
||||
def ext_ms_delayed_template_argument: ExtWarn<
|
||||
"using the undeclared type %0 as a default template argument is a "
|
||||
"Microsoft extension">, InGroup<MicrosoftTemplate>;
|
||||
def err_template_arg_deduced_incomplete_pack : Error<
|
||||
"deduced incomplete pack %0 for template parameter %1">;
|
||||
|
||||
// C++ template specialization
|
||||
def err_template_spec_unknown_kind : Error<
|
||||
|
@ -162,6 +162,9 @@
|
||||
#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
|
||||
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name)
|
||||
#endif
|
||||
#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
|
||||
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name)
|
||||
#endif
|
||||
|
||||
// OpenMP directives.
|
||||
OPENMP_DIRECTIVE(threadprivate)
|
||||
@ -214,6 +217,7 @@ OPENMP_DIRECTIVE_EXT(teams_distribute_parallel_for, "teams distribute parallel f
|
||||
OPENMP_DIRECTIVE_EXT(target_teams, "target teams")
|
||||
OPENMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute")
|
||||
OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distribute parallel for")
|
||||
OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd")
|
||||
|
||||
// OpenMP clauses.
|
||||
OPENMP_CLAUSE(if, OMPIfClause)
|
||||
@ -793,6 +797,33 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
|
||||
|
||||
// Clauses allowed for OpenMP directive
|
||||
// 'target teams distribute parallel for simd'.
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(device)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(map)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(nowait)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(depend)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(defaultmap)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(is_device_ptr)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_teams)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(thread_limit)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen)
|
||||
OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
|
||||
|
||||
#undef OPENMP_TASKLOOP_SIMD_CLAUSE
|
||||
#undef OPENMP_TASKLOOP_CLAUSE
|
||||
#undef OPENMP_LINEAR_KIND
|
||||
@ -843,3 +874,4 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear)
|
||||
#undef OPENMP_TARGET_TEAMS_CLAUSE
|
||||
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_CLAUSE
|
||||
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE
|
||||
#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE
|
||||
|
@ -243,3 +243,4 @@ def OMPTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
|
||||
def OMPTargetTeamsDirective : DStmt<OMPExecutableDirective>;
|
||||
def OMPTargetTeamsDistributeDirective : DStmt<OMPLoopDirective>;
|
||||
def OMPTargetTeamsDistributeParallelForDirective : DStmt<OMPLoopDirective>;
|
||||
def OMPTargetTeamsDistributeParallelForSimdDirective : DStmt<OMPLoopDirective>;
|
||||
|
@ -882,7 +882,7 @@ def fms_compatibility_version
|
||||
"(default))">;
|
||||
def fdelayed_template_parsing : Flag<["-"], "fdelayed-template-parsing">, Group<f_Group>,
|
||||
HelpText<"Parse templated function definitions at the end of the "
|
||||
"translation unit">, Flags<[CC1Option]>;
|
||||
"translation unit">, Flags<[CC1Option, CoreOption]>;
|
||||
def fms_memptr_rep_EQ : Joined<["-"], "fms-memptr-rep=">, Group<f_Group>, Flags<[CC1Option]>;
|
||||
def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>,
|
||||
Flags<[DriverOption, CC1Option]>, MetaVarName<"<directory>">,
|
||||
@ -1031,7 +1031,8 @@ def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>,
|
||||
Flags<[CoreOption]>;
|
||||
def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>,
|
||||
Flags<[CoreOption]>;
|
||||
def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>;
|
||||
def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>,
|
||||
Flags<[DriverOption, CoreOption]>;
|
||||
def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group<f_Group>;
|
||||
def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group<f_Group>;
|
||||
def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group<f_Group>, Flags<[CC1Option]>;
|
||||
@ -1331,6 +1332,12 @@ def funique_section_names : Flag <["-"], "funique-section-names">,
|
||||
def fno_unique_section_names : Flag <["-"], "fno-unique-section-names">,
|
||||
Group<f_Group>, Flags<[CC1Option]>;
|
||||
|
||||
def fstrict_return : Flag<["-"], "fstrict-return">, Group<f_Group>,
|
||||
Flags<[CC1Option]>,
|
||||
HelpText<"Always treat control flow paths that fall off the end of a non-void"
|
||||
"function as unreachable">;
|
||||
def fno_strict_return : Flag<["-"], "fno-strict-return">, Group<f_Group>,
|
||||
Flags<[CC1Option]>;
|
||||
|
||||
def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group<f_Group>,
|
||||
Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;
|
||||
|
@ -251,6 +251,10 @@ CODEGENOPT(DiagnosticsWithHotness, 1, 0)
|
||||
/// Whether copy relocations support is available when building as PIE.
|
||||
CODEGENOPT(PIECopyRelocations, 1, 0)
|
||||
|
||||
/// Whether we should use the undefined behaviour optimization for control flow
|
||||
/// paths that reach the end of a function without executing a required return.
|
||||
CODEGENOPT(StrictReturn, 1, 1)
|
||||
|
||||
#undef CODEGENOPT
|
||||
#undef ENUM_CODEGENOPT
|
||||
#undef VALUE_CODEGENOPT
|
||||
|
@ -8514,6 +8514,12 @@ public:
|
||||
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
|
||||
SourceLocation EndLoc,
|
||||
llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
|
||||
/// Called on well-formed '\#pragma omp target teams distribute parallel for
|
||||
/// simd' after parsing of the associated statement.
|
||||
StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
|
||||
SourceLocation EndLoc,
|
||||
llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA);
|
||||
|
||||
/// Checks correctness of linear modifiers.
|
||||
bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind,
|
||||
|
@ -1516,6 +1516,7 @@ namespace clang {
|
||||
STMT_OMP_TARGET_TEAMS_DIRECTIVE,
|
||||
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE,
|
||||
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,
|
||||
STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE,
|
||||
EXPR_OMP_ARRAY_SECTION,
|
||||
|
||||
// ARC
|
||||
|
100
contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
Normal file
100
contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h
Normal file
@ -0,0 +1,100 @@
|
||||
//===--- Diagnostic.h - Framework for clang diagnostics tools --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// \file
|
||||
// Structures supporting diagnostics and refactorings that span multiple
|
||||
// translation units. Indicate diagnostics reports and replacements
|
||||
// suggestions for the analyzed sources.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
|
||||
#define LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
|
||||
|
||||
#include "Replacement.h"
|
||||
#include "clang/Basic/Diagnostic.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include <string>
|
||||
|
||||
namespace clang {
|
||||
namespace tooling {
|
||||
|
||||
/// \brief Represents the diagnostic message with the error message associated
|
||||
/// and the information on the location of the problem.
|
||||
struct DiagnosticMessage {
|
||||
DiagnosticMessage(llvm::StringRef Message = "");
|
||||
|
||||
/// \brief Constructs a diagnostic message with anoffset to the diagnostic
|
||||
/// within the file where the problem occured.
|
||||
///
|
||||
/// \param Loc Should be a file location, it is not meaningful for a macro
|
||||
/// location.
|
||||
///
|
||||
DiagnosticMessage(llvm::StringRef Message, const SourceManager &Sources,
|
||||
SourceLocation Loc);
|
||||
std::string Message;
|
||||
std::string FilePath;
|
||||
unsigned FileOffset;
|
||||
};
|
||||
|
||||
/// \brief Represents the diagnostic with the level of severity and possible
|
||||
/// fixes to be applied.
|
||||
struct Diagnostic {
|
||||
enum Level {
|
||||
Warning = DiagnosticsEngine::Warning,
|
||||
Error = DiagnosticsEngine::Error
|
||||
};
|
||||
|
||||
Diagnostic() = default;
|
||||
|
||||
Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel,
|
||||
StringRef BuildDirectory);
|
||||
|
||||
Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message,
|
||||
llvm::StringMap<Replacements> &Fix,
|
||||
SmallVector<DiagnosticMessage, 1> &Notes, Level DiagLevel,
|
||||
llvm::StringRef BuildDirectory);
|
||||
|
||||
/// \brief Name identifying the Diagnostic.
|
||||
std::string DiagnosticName;
|
||||
|
||||
/// \brief Message associated to the diagnostic.
|
||||
DiagnosticMessage Message;
|
||||
|
||||
/// \brief Fixes to apply, grouped by file path.
|
||||
llvm::StringMap<Replacements> Fix;
|
||||
|
||||
/// \brief Potential notes about the diagnostic.
|
||||
SmallVector<DiagnosticMessage, 1> Notes;
|
||||
|
||||
/// \brief Diagnostic level. Can indicate either an error or a warning.
|
||||
Level DiagLevel;
|
||||
|
||||
/// \brief A build directory of the diagnostic source file.
|
||||
///
|
||||
/// It's an absolute path which is `directory` field of the source file in
|
||||
/// compilation database. If users don't specify the compilation database
|
||||
/// directory, it is the current directory where clang-tidy runs.
|
||||
///
|
||||
/// Note: it is empty in unittest.
|
||||
std::string BuildDirectory;
|
||||
};
|
||||
|
||||
/// \brief Collection of Diagnostics generated from a single translation unit.
|
||||
struct TranslationUnitDiagnostics {
|
||||
/// Name of the main source for the translation unit.
|
||||
std::string MainSourceFile;
|
||||
std::vector<Diagnostic> Diagnostics;
|
||||
};
|
||||
|
||||
} // end namespace tooling
|
||||
} // end namespace clang
|
||||
#endif // LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H
|
@ -329,12 +329,6 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
|
||||
struct TranslationUnitReplacements {
|
||||
/// Name of the main source for the translation unit.
|
||||
std::string MainSourceFile;
|
||||
|
||||
/// A freeform chunk of text to describe the context of the replacements.
|
||||
/// Will be printed, for example, when detecting conflicts during replacement
|
||||
/// deduplication.
|
||||
std::string Context;
|
||||
|
||||
std::vector<Replacement> Replacements;
|
||||
};
|
||||
|
||||
|
101
contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
Normal file
101
contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h
Normal file
@ -0,0 +1,101 @@
|
||||
//===-- DiagnosticsYaml.h -- Serialiazation for Diagnosticss ---*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// \brief This file defines the structure of a YAML document for serializing
|
||||
/// diagnostics.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
|
||||
#define LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
|
||||
|
||||
#include "clang/Tooling/Core/Diagnostic.h"
|
||||
#include "clang/Tooling/ReplacementsYaml.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <string>
|
||||
|
||||
LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::Diagnostic)
|
||||
|
||||
namespace llvm {
|
||||
namespace yaml {
|
||||
|
||||
template <> struct MappingTraits<clang::tooling::Diagnostic> {
|
||||
/// \brief Helper to (de)serialize a Diagnostic since we don't have direct
|
||||
/// access to its data members.
|
||||
class NormalizedDiagnostic {
|
||||
public:
|
||||
NormalizedDiagnostic(const IO &)
|
||||
: DiagLevel(clang::tooling::Diagnostic::Level::Warning) {}
|
||||
|
||||
NormalizedDiagnostic(const IO &, const clang::tooling::Diagnostic &D)
|
||||
: DiagnosticName(D.DiagnosticName), Message(D.Message), Fix(D.Fix),
|
||||
Notes(D.Notes), DiagLevel(D.DiagLevel),
|
||||
BuildDirectory(D.BuildDirectory) {}
|
||||
|
||||
clang::tooling::Diagnostic denormalize(const IO &) {
|
||||
return clang::tooling::Diagnostic(DiagnosticName, Message, Fix, Notes,
|
||||
DiagLevel, BuildDirectory);
|
||||
}
|
||||
|
||||
std::string DiagnosticName;
|
||||
clang::tooling::DiagnosticMessage Message;
|
||||
llvm::StringMap<clang::tooling::Replacements> Fix;
|
||||
SmallVector<clang::tooling::DiagnosticMessage, 1> Notes;
|
||||
clang::tooling::Diagnostic::Level DiagLevel;
|
||||
std::string BuildDirectory;
|
||||
};
|
||||
|
||||
static void mapping(IO &Io, clang::tooling::Diagnostic &D) {
|
||||
MappingNormalization<NormalizedDiagnostic, clang::tooling::Diagnostic> Keys(
|
||||
Io, D);
|
||||
Io.mapRequired("DiagnosticName", Keys->DiagnosticName);
|
||||
|
||||
// FIXME: Export properly all the different fields.
|
||||
|
||||
std::vector<clang::tooling::Replacement> Fixes;
|
||||
for (auto &Replacements : Keys->Fix) {
|
||||
for (auto &Replacement : Replacements.second) {
|
||||
Fixes.push_back(Replacement);
|
||||
}
|
||||
}
|
||||
Io.mapRequired("Replacements", Fixes);
|
||||
for (auto &Fix : Fixes) {
|
||||
llvm::Error Err = Keys->Fix[Fix.getFilePath()].add(Fix);
|
||||
if (Err) {
|
||||
// FIXME: Implement better conflict handling.
|
||||
llvm::errs() << "Fix conflicts with existing fix: "
|
||||
<< llvm::toString(std::move(Err)) << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief Specialized MappingTraits to describe how a
|
||||
/// TranslationUnitDiagnostics is (de)serialized.
|
||||
template <> struct MappingTraits<clang::tooling::TranslationUnitDiagnostics> {
|
||||
static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) {
|
||||
Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
|
||||
|
||||
std::vector<clang::tooling::Diagnostic> Diagnostics;
|
||||
for (auto &Diagnostic : Doc.Diagnostics) {
|
||||
// FIXME: Export all diagnostics, not just the ones with fixes.
|
||||
// Update MappingTraits<clang::tooling::Diagnostic>::mapping.
|
||||
if (Diagnostic.Fix.size() > 0) {
|
||||
Diagnostics.push_back(Diagnostic);
|
||||
}
|
||||
}
|
||||
Io.mapRequired("Diagnostics", Diagnostics);
|
||||
Doc.Diagnostics = Diagnostics;
|
||||
}
|
||||
};
|
||||
} // end namespace yaml
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H
|
@ -65,7 +65,6 @@ template <> struct MappingTraits<clang::tooling::TranslationUnitReplacements> {
|
||||
static void mapping(IO &Io,
|
||||
clang::tooling::TranslationUnitReplacements &Doc) {
|
||||
Io.mapRequired("MainSourceFile", Doc.MainSourceFile);
|
||||
Io.mapOptional("Context", Doc.Context, std::string());
|
||||
Io.mapRequired("Replacements", Doc.Replacements);
|
||||
}
|
||||
};
|
||||
|
@ -7192,6 +7192,12 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
|
||||
CharUnits &EndOffset) {
|
||||
bool DetermineForCompleteObject = refersToCompleteObject(LVal);
|
||||
|
||||
auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) {
|
||||
if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType())
|
||||
return false;
|
||||
return HandleSizeof(Info, ExprLoc, Ty, Result);
|
||||
};
|
||||
|
||||
// We want to evaluate the size of the entire object. This is a valid fallback
|
||||
// for when Type=1 and the designator is invalid, because we're asked for an
|
||||
// upper-bound.
|
||||
@ -7209,7 +7215,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
|
||||
return false;
|
||||
|
||||
QualType BaseTy = getObjectType(LVal.getLValueBase());
|
||||
return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset);
|
||||
return CheckedHandleSizeof(BaseTy, EndOffset);
|
||||
}
|
||||
|
||||
// We want to evaluate the size of a subobject.
|
||||
@ -7238,7 +7244,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
|
||||
}
|
||||
|
||||
CharUnits BytesPerElem;
|
||||
if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem))
|
||||
if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem))
|
||||
return false;
|
||||
|
||||
// According to the GCC documentation, we want the size of the subobject
|
||||
|
@ -1659,3 +1659,64 @@ OMPTargetTeamsDistributeParallelForDirective::CreateEmpty(const ASTContext &C,
|
||||
return new (Mem)
|
||||
OMPTargetTeamsDistributeParallelForDirective(CollapsedNum, NumClauses);
|
||||
}
|
||||
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective *
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective::Create(
|
||||
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
|
||||
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
|
||||
const HelperExprs &Exprs) {
|
||||
auto Size =
|
||||
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
|
||||
alignof(OMPClause *));
|
||||
void *Mem = C.Allocate(
|
||||
Size + sizeof(OMPClause *) * Clauses.size() +
|
||||
sizeof(Stmt *) *
|
||||
numLoopChildren(CollapsedNum,
|
||||
OMPD_target_teams_distribute_parallel_for_simd));
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective *Dir =
|
||||
new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
StartLoc, EndLoc, CollapsedNum, Clauses.size());
|
||||
Dir->setClauses(Clauses);
|
||||
Dir->setAssociatedStmt(AssociatedStmt);
|
||||
Dir->setIterationVariable(Exprs.IterationVarRef);
|
||||
Dir->setLastIteration(Exprs.LastIteration);
|
||||
Dir->setCalcLastIteration(Exprs.CalcLastIteration);
|
||||
Dir->setPreCond(Exprs.PreCond);
|
||||
Dir->setCond(Exprs.Cond);
|
||||
Dir->setInit(Exprs.Init);
|
||||
Dir->setInc(Exprs.Inc);
|
||||
Dir->setIsLastIterVariable(Exprs.IL);
|
||||
Dir->setLowerBoundVariable(Exprs.LB);
|
||||
Dir->setUpperBoundVariable(Exprs.UB);
|
||||
Dir->setStrideVariable(Exprs.ST);
|
||||
Dir->setEnsureUpperBound(Exprs.EUB);
|
||||
Dir->setNextLowerBound(Exprs.NLB);
|
||||
Dir->setNextUpperBound(Exprs.NUB);
|
||||
Dir->setNumIterations(Exprs.NumIterations);
|
||||
Dir->setPrevLowerBoundVariable(Exprs.PrevLB);
|
||||
Dir->setPrevUpperBoundVariable(Exprs.PrevUB);
|
||||
Dir->setCounters(Exprs.Counters);
|
||||
Dir->setPrivateCounters(Exprs.PrivateCounters);
|
||||
Dir->setInits(Exprs.Inits);
|
||||
Dir->setUpdates(Exprs.Updates);
|
||||
Dir->setFinals(Exprs.Finals);
|
||||
Dir->setPreInits(Exprs.PreInits);
|
||||
return Dir;
|
||||
}
|
||||
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective *
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty(
|
||||
const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
|
||||
EmptyShell) {
|
||||
auto Size =
|
||||
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective),
|
||||
alignof(OMPClause *));
|
||||
void *Mem = C.Allocate(
|
||||
Size + sizeof(OMPClause *) * NumClauses +
|
||||
sizeof(Stmt *) *
|
||||
numLoopChildren(CollapsedNum,
|
||||
OMPD_target_teams_distribute_parallel_for_simd));
|
||||
return new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
CollapsedNum, NumClauses);
|
||||
}
|
||||
|
||||
|
@ -1244,6 +1244,12 @@ void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective(
|
||||
PrintOMPExecutableDirective(Node);
|
||||
}
|
||||
|
||||
void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
OMPTargetTeamsDistributeParallelForSimdDirective *Node) {
|
||||
Indent() << "#pragma omp target teams distribute parallel for simd ";
|
||||
PrintOMPExecutableDirective(Node);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Expr printing methods.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -763,6 +763,11 @@ void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForDirective(
|
||||
VisitOMPLoopDirective(S);
|
||||
}
|
||||
|
||||
void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
const OMPTargetTeamsDistributeParallelForSimdDirective *S) {
|
||||
VisitOMPLoopDirective(S);
|
||||
}
|
||||
|
||||
void StmtProfiler::VisitExpr(const Expr *S) {
|
||||
VisitStmt(S);
|
||||
}
|
||||
|
@ -1690,15 +1690,19 @@ CFGBuilder::VisitLogicalOperator(BinaryOperator *B,
|
||||
// we have been provided.
|
||||
ExitBlock = RHSBlock = createBlock(false);
|
||||
|
||||
// Even though KnownVal is only used in the else branch of the next
|
||||
// conditional, tryEvaluateBool performs additional checking on the
|
||||
// Expr, so it should be called unconditionally.
|
||||
TryResult KnownVal = tryEvaluateBool(RHS);
|
||||
if (!KnownVal.isKnown())
|
||||
KnownVal = tryEvaluateBool(B);
|
||||
|
||||
if (!Term) {
|
||||
assert(TrueBlock == FalseBlock);
|
||||
addSuccessor(RHSBlock, TrueBlock);
|
||||
}
|
||||
else {
|
||||
RHSBlock->setTerminator(Term);
|
||||
TryResult KnownVal = tryEvaluateBool(RHS);
|
||||
if (!KnownVal.isKnown())
|
||||
KnownVal = tryEvaluateBool(B);
|
||||
addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse());
|
||||
addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue());
|
||||
}
|
||||
|
@ -685,6 +685,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
|
||||
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name) \
|
||||
case OMPC_##Name: \
|
||||
return true;
|
||||
#include "clang/Basic/OpenMPKinds.def"
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
switch (CKind) {
|
||||
#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name) \
|
||||
case OMPC_##Name: \
|
||||
return true;
|
||||
#include "clang/Basic/OpenMPKinds.def"
|
||||
default:
|
||||
break;
|
||||
@ -721,7 +731,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for;
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -735,8 +746,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_target_parallel_for_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for;
|
||||
// TODO add next directives.
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -752,8 +763,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_target_parallel_for_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for;
|
||||
// TODO add next directives.
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -761,7 +772,8 @@ bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_target_parallel_for ||
|
||||
DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd ||
|
||||
DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for;
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -779,7 +791,8 @@ bool clang::isOpenMPNestingTeamsDirective(OpenMPDirectiveKind DKind) {
|
||||
bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
|
||||
return isOpenMPNestingTeamsDirective(DKind) ||
|
||||
DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for;
|
||||
DKind == OMPD_target_teams_distribute_parallel_for ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
|
||||
@ -788,8 +801,8 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
|
||||
DKind == OMPD_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_distribute_simd || DKind == OMPD_target_simd ||
|
||||
DKind == OMPD_teams_distribute_simd ||
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd;
|
||||
// TODO add next directives.
|
||||
DKind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
DKind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) {
|
||||
@ -805,7 +818,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
|
||||
Kind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
Kind == OMPD_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for;
|
||||
Kind == OMPD_target_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
||||
bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
|
||||
@ -830,5 +844,6 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
|
||||
Kind == OMPD_teams_distribute_parallel_for_simd ||
|
||||
Kind == OMPD_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for;
|
||||
Kind == OMPD_target_teams_distribute_parallel_for ||
|
||||
Kind == OMPD_target_teams_distribute_parallel_for_simd;
|
||||
}
|
||||
|
@ -20,53 +20,64 @@
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
|
||||
/// \brief Get the GPU warp size.
|
||||
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
|
||||
namespace {
|
||||
enum OpenMPRTLFunctionNVPTX {
|
||||
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
|
||||
/// kmp_int32 thread_limit);
|
||||
OMPRTL_NVPTX__kmpc_kernel_init,
|
||||
};
|
||||
|
||||
// NVPTX Address space
|
||||
enum AddressSpace {
|
||||
AddressSpaceShared = 3,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/// Get the GPU warp size.
|
||||
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
|
||||
llvm::None, "nvptx_warp_size");
|
||||
}
|
||||
|
||||
/// \brief Get the id of the current thread on the GPU.
|
||||
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
|
||||
/// Get the id of the current thread on the GPU.
|
||||
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
|
||||
llvm::None, "nvptx_tid");
|
||||
}
|
||||
|
||||
// \brief Get the maximum number of threads in a block of the GPU.
|
||||
llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
|
||||
/// Get the maximum number of threads in a block of the GPU.
|
||||
static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
return Bld.CreateCall(
|
||||
llvm::Intrinsic::getDeclaration(
|
||||
&CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
|
||||
llvm::None, "nvptx_num_threads");
|
||||
}
|
||||
|
||||
/// \brief Get barrier to synchronize all threads in a block.
|
||||
void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
|
||||
/// Get barrier to synchronize all threads in a block.
|
||||
static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
Bld.CreateCall(llvm::Intrinsic::getDeclaration(
|
||||
&CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
|
||||
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
|
||||
}
|
||||
|
||||
// \brief Synchronize all GPU threads in a block.
|
||||
void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
|
||||
getNVPTXCTABarrier(CGF);
|
||||
}
|
||||
/// Synchronize all GPU threads in a block.
|
||||
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
|
||||
|
||||
/// \brief Get the thread id of the OMP master thread.
|
||||
/// Get the thread id of the OMP master thread.
|
||||
/// The master thread id is the first thread (lane) of the last warp in the
|
||||
/// GPU block. Warp size is assumed to be some power of 2.
|
||||
/// Thread id is 0 indexed.
|
||||
/// E.g: If NumThreads is 33, master id is 32.
|
||||
/// If NumThreads is 64, master id is 32.
|
||||
/// If NumThreads is 1024, master id is 992.
|
||||
llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
|
||||
static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
|
||||
|
||||
@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
|
||||
Bld.CreateNot(Mask), "master_tid");
|
||||
}
|
||||
|
||||
namespace {
|
||||
enum OpenMPRTLFunctionNVPTX {
|
||||
/// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
|
||||
/// kmp_int32 thread_limit);
|
||||
OMPRTL_NVPTX__kmpc_kernel_init,
|
||||
};
|
||||
|
||||
// NVPTX Address space
|
||||
enum ADDRESS_SPACE {
|
||||
ADDRESS_SPACE_SHARED = 3,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
|
||||
CodeGenModule &CGM)
|
||||
: WorkerFn(nullptr), CGFI(nullptr) {
|
||||
@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
|
||||
CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
|
||||
llvm::GlobalValue::CommonLinkage,
|
||||
llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
|
||||
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
|
||||
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
|
||||
ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
|
||||
|
||||
WorkID = new llvm::GlobalVariable(
|
||||
CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
|
||||
llvm::GlobalValue::CommonLinkage,
|
||||
llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
|
||||
llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
|
||||
llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
|
||||
WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
|
||||
}
|
||||
|
||||
|
@ -49,38 +49,6 @@ public:
|
||||
void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
|
||||
|
||||
private:
|
||||
//
|
||||
// NVPTX calls.
|
||||
//
|
||||
|
||||
/// \brief Get the GPU warp size.
|
||||
llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
|
||||
|
||||
/// \brief Get the id of the current thread on the GPU.
|
||||
llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
|
||||
|
||||
// \brief Get the maximum number of threads in a block of the GPU.
|
||||
llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
|
||||
|
||||
/// \brief Get barrier to synchronize all threads in a block.
|
||||
void getNVPTXCTABarrier(CodeGenFunction &CGF);
|
||||
|
||||
// \brief Synchronize all GPU threads in a block.
|
||||
void syncCTAThreads(CodeGenFunction &CGF);
|
||||
|
||||
//
|
||||
// OMP calls.
|
||||
//
|
||||
|
||||
/// \brief Get the thread id of the OMP master thread.
|
||||
/// The master thread id is the first thread (lane) of the last warp in the
|
||||
/// GPU block. Warp size is assumed to be some power of 2.
|
||||
/// Thread id is 0 indexed.
|
||||
/// E.g: If NumThreads is 33, master id is 32.
|
||||
/// If NumThreads is 64, master id is 32.
|
||||
/// If NumThreads is 1024, master id is 992.
|
||||
llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
|
||||
|
||||
//
|
||||
// Private state and methods.
|
||||
//
|
||||
|
@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
|
||||
EmitOMPTargetTeamsDistributeParallelForDirective(
|
||||
cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
|
||||
EmitOMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
|
||||
});
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
*this, OMPD_target_teams_distribute_parallel_for_simd,
|
||||
[&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
});
|
||||
}
|
||||
|
||||
/// \brief Emit a helper variable and return corresponding lvalue.
|
||||
static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
|
||||
const DeclRefExpr *Helper) {
|
||||
@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
|
||||
|
||||
auto &RT = CGM.getOpenMPRuntime();
|
||||
|
||||
bool HasLastprivateClause = false;
|
||||
// Check pre-condition.
|
||||
{
|
||||
OMPLoopScope PreInitScope(*this, S);
|
||||
@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
|
||||
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
|
||||
|
||||
OMPPrivateScope LoopScope(*this);
|
||||
if (EmitOMPFirstprivateClause(S, LoopScope)) {
|
||||
// Emit implicit barrier to synchronize threads and avoid data races on
|
||||
// initialization of firstprivate variables and post-update of
|
||||
// lastprivate variables.
|
||||
CGM.getOpenMPRuntime().emitBarrierCall(
|
||||
*this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
|
||||
/*ForceSimpleCall=*/true);
|
||||
}
|
||||
EmitOMPPrivateClause(S, LoopScope);
|
||||
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
|
||||
EmitOMPPrivateLoopCounters(S, LoopScope);
|
||||
(void)LoopScope.Privatize();
|
||||
|
||||
@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
|
||||
LB.getAddress(), UB.getAddress(), ST.getAddress(),
|
||||
IL.getAddress(), Chunk);
|
||||
}
|
||||
|
||||
// Emit final copy of the lastprivate variables if IsLastIter != 0.
|
||||
if (HasLastprivateClause)
|
||||
EmitOMPLastprivateClauseFinal(
|
||||
S, /*NoFinals=*/false,
|
||||
Builder.CreateIsNotNull(
|
||||
EmitLoadOfScalar(IL, S.getLocStart())));
|
||||
}
|
||||
|
||||
// We're now done with the loop, so jump to the continuation block.
|
||||
|
@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
|
||||
return ResTy;
|
||||
}
|
||||
|
||||
static bool
|
||||
shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD,
|
||||
const ASTContext &Context) {
|
||||
QualType T = FD->getReturnType();
|
||||
// Avoid the optimization for functions that return a record type with a
|
||||
// trivial destructor or another trivially copyable type.
|
||||
if (const RecordType *RT = T.getCanonicalType()->getAs<RecordType>()) {
|
||||
if (const auto *ClassDecl = dyn_cast<CXXRecordDecl>(RT->getDecl()))
|
||||
return !ClassDecl->hasTrivialDestructor();
|
||||
}
|
||||
return !T.isTriviallyCopyableType(Context);
|
||||
}
|
||||
|
||||
void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
|
||||
const CGFunctionInfo &FnInfo) {
|
||||
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
|
||||
@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
|
||||
// function call is used by the caller, the behavior is undefined.
|
||||
if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock &&
|
||||
!FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) {
|
||||
bool ShouldEmitUnreachable =
|
||||
CGM.getCodeGenOpts().StrictReturn ||
|
||||
shouldUseUndefinedBehaviorReturnOptimization(FD, getContext());
|
||||
if (SanOpts.has(SanitizerKind::Return)) {
|
||||
SanitizerScope SanScope(this);
|
||||
llvm::Value *IsFalse = Builder.getFalse();
|
||||
EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return),
|
||||
SanitizerHandler::MissingReturn,
|
||||
EmitCheckSourceLocation(FD->getLocation()), None);
|
||||
} else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
|
||||
EmitTrapCall(llvm::Intrinsic::trap);
|
||||
} else if (ShouldEmitUnreachable) {
|
||||
if (CGM.getCodeGenOpts().OptimizationLevel == 0)
|
||||
EmitTrapCall(llvm::Intrinsic::trap);
|
||||
}
|
||||
if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) {
|
||||
Builder.CreateUnreachable();
|
||||
Builder.ClearInsertionPoint();
|
||||
}
|
||||
Builder.CreateUnreachable();
|
||||
Builder.ClearInsertionPoint();
|
||||
}
|
||||
|
||||
// Emit the standard function epilogue.
|
||||
|
@ -2699,6 +2699,8 @@ public:
|
||||
const OMPTargetTeamsDistributeDirective &S);
|
||||
void EmitOMPTargetTeamsDistributeParallelForDirective(
|
||||
const OMPTargetTeamsDistributeParallelForDirective &S);
|
||||
void EmitOMPTargetTeamsDistributeParallelForSimdDirective(
|
||||
const OMPTargetTeamsDistributeParallelForSimdDirective &S);
|
||||
|
||||
/// Emit outlined function for the target directive.
|
||||
static std::pair<llvm::Function * /*OutlinedFn*/,
|
||||
@ -3569,7 +3571,7 @@ public:
|
||||
|
||||
// If we still have any arguments, emit them using the type of the argument.
|
||||
for (auto *A : llvm::make_range(Arg, ArgRange.end()))
|
||||
ArgTypes.push_back(getVarArgType(A));
|
||||
ArgTypes.push_back(CallArgTypeInfo ? getVarArgType(A) : A->getType());
|
||||
|
||||
EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order);
|
||||
}
|
||||
|
@ -2235,6 +2235,15 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
|
||||
UseSeparateSections)) {
|
||||
CmdArgs.push_back("-plugin-opt=-data-sections");
|
||||
}
|
||||
|
||||
if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) {
|
||||
StringRef FName = A->getValue();
|
||||
if (!llvm::sys::fs::exists(FName))
|
||||
D.Diag(diag::err_drv_no_such_file) << FName;
|
||||
else
|
||||
CmdArgs.push_back(
|
||||
Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
|
||||
}
|
||||
}
|
||||
|
||||
/// This is a helper function for validating the optional refinement step
|
||||
@ -3058,6 +3067,10 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() &&
|
||||
Value == "-mbig-obj")
|
||||
continue; // LLVM handles bigobj automatically
|
||||
|
||||
switch (C.getDefaultToolChain().getArch()) {
|
||||
default:
|
||||
break;
|
||||
@ -4453,6 +4466,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
|
||||
false))
|
||||
CmdArgs.push_back("-fstrict-enums");
|
||||
if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return,
|
||||
true))
|
||||
CmdArgs.push_back("-fno-strict-return");
|
||||
if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
|
||||
options::OPT_fno_strict_vtable_pointers,
|
||||
false))
|
||||
|
@ -638,6 +638,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
|
||||
ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
|
||||
ChromiumStyle.ContinuationIndentWidth = 8;
|
||||
ChromiumStyle.IndentWidth = 4;
|
||||
} else if (Language == FormatStyle::LK_JavaScript) {
|
||||
ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
|
||||
ChromiumStyle.AllowShortLoopsOnASingleLine = false;
|
||||
} else {
|
||||
ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
|
||||
ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
|
||||
|
@ -1255,10 +1255,13 @@ void UnwrappedLineParser::tryToParseJSFunction() {
|
||||
if (FormatTok->is(tok::l_brace))
|
||||
tryToParseBracedList();
|
||||
else
|
||||
while (FormatTok->isNot(tok::l_brace) && !eof())
|
||||
while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
|
||||
nextToken();
|
||||
}
|
||||
|
||||
if (FormatTok->is(tok::semi))
|
||||
return;
|
||||
|
||||
parseChildBlock();
|
||||
}
|
||||
|
||||
|
@ -370,6 +370,26 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
|
||||
break;
|
||||
}
|
||||
|
||||
case Decl::ClassTemplateSpecialization: {
|
||||
const auto *CTSD = cast<ClassTemplateSpecializationDecl>(DC);
|
||||
if (CTSD->isCompleteDefinition())
|
||||
Out << "[class template specialization] ";
|
||||
else
|
||||
Out << "<class template specialization> ";
|
||||
Out << *CTSD;
|
||||
break;
|
||||
}
|
||||
|
||||
case Decl::ClassTemplatePartialSpecialization: {
|
||||
const auto *CTPSD = cast<ClassTemplatePartialSpecializationDecl>(DC);
|
||||
if (CTPSD->isCompleteDefinition())
|
||||
Out << "[class template partial specialization] ";
|
||||
else
|
||||
Out << "<class template partial specialization> ";
|
||||
Out << *CTPSD;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
llvm_unreachable("a decl that inherits DeclContext isn't handled");
|
||||
}
|
||||
@ -400,7 +420,8 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
|
||||
case Decl::CXXConstructor:
|
||||
case Decl::CXXDestructor:
|
||||
case Decl::CXXConversion:
|
||||
{
|
||||
case Decl::ClassTemplateSpecialization:
|
||||
case Decl::ClassTemplatePartialSpecialization: {
|
||||
DeclContext* DC = cast<DeclContext>(I);
|
||||
PrintDeclContext(DC, Indentation+2);
|
||||
break;
|
||||
@ -478,6 +499,37 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC,
|
||||
Out << "<omp threadprivate> " << '"' << I << "\"\n";
|
||||
break;
|
||||
}
|
||||
case Decl::Friend: {
|
||||
Out << "<friend>";
|
||||
if (const NamedDecl *ND = cast<FriendDecl>(I)->getFriendDecl())
|
||||
Out << ' ' << *ND;
|
||||
Out << "\n";
|
||||
break;
|
||||
}
|
||||
case Decl::Using: {
|
||||
Out << "<using> " << *cast<UsingDecl>(I) << "\n";
|
||||
break;
|
||||
}
|
||||
case Decl::UsingShadow: {
|
||||
Out << "<using shadow> " << *cast<UsingShadowDecl>(I) << "\n";
|
||||
break;
|
||||
}
|
||||
case Decl::Empty: {
|
||||
Out << "<empty>\n";
|
||||
break;
|
||||
}
|
||||
case Decl::AccessSpec: {
|
||||
Out << "<access specifier>\n";
|
||||
break;
|
||||
}
|
||||
case Decl::VarTemplate: {
|
||||
Out << "<var template> " << *cast<VarTemplateDecl>(I) << "\n";
|
||||
break;
|
||||
}
|
||||
case Decl::StaticAssert: {
|
||||
Out << "<static assert>\n";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
Out << "DeclKind: " << DK << '"' << I << "\"\n";
|
||||
llvm_unreachable("decl unhandled");
|
||||
|
@ -602,6 +602,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
||||
Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
|
||||
Opts.SoftFloat = Args.hasArg(OPT_msoft_float);
|
||||
Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums);
|
||||
Opts.StrictReturn = !Args.hasArg(OPT_fno_strict_return);
|
||||
Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers);
|
||||
Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
|
||||
Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user