diff --git a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h index 559fb40773aa..a77cf04ea4d1 100644 --- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -21,8 +21,8 @@ // class MyClass : public RefCountedBase {}; // // void foo() { -// // Objects that inherit from RefCountedBase should always be instantiated -// // on the heap, never on the stack. +// // Constructing an IntrusiveRefCntPtr increases the pointee's refcount by +// // 1 (from 0 in this case). // IntrusiveRefCntPtr Ptr1(new MyClass()); // // // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1. @@ -68,9 +68,6 @@ namespace llvm { /// calls to Release() and Retain(), which increment and decrement the object's /// refcount, respectively. When a Release() call decrements the refcount to 0, /// the object deletes itself. -/// -/// Objects that inherit from RefCountedBase should always be allocated with -/// operator new. template class RefCountedBase { mutable unsigned RefCount = 0; diff --git a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h index c0b4709e98f8..3198dd438700 100644 --- a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h +++ b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include @@ -107,6 +108,39 @@ public: return false; } + /// Insert a sequence of new elements into the PriorityWorklist. + template + typename std::enable_if::value>::type + insert(SequenceT &&Input) { + if (std::begin(Input) == std::end(Input)) + // Nothing to do for an empty input sequence. + return; + + // First pull the input sequence into the vector as a bulk append + // operation. + ptrdiff_t StartIndex = V.size(); + V.insert(V.end(), std::begin(Input), std::end(Input)); + // Now walk backwards fixing up the index map and deleting any duplicates. + for (ptrdiff_t i = V.size() - 1; i >= StartIndex; --i) { + auto InsertResult = M.insert({V[i], i}); + if (InsertResult.second) + continue; + + // If the existing index is before this insert's start, nuke that one and + // move it up. + ptrdiff_t &Index = InsertResult.first->second; + if (Index < StartIndex) { + V[Index] = T(); + Index = i; + continue; + } + + // Otherwise the existing one comes first so just clear out the value in + // this slot. + V[i] = T(); + } + } + /// Remove the last element of the PriorityWorklist. void pop_back() { assert(!empty() && "Cannot remove an element when empty!"); @@ -169,6 +203,11 @@ public: return true; } + /// Reverse the items in the PriorityWorklist. + /// + /// This does an in-place reversal. Other kinds of reverse aren't easy to + /// support in the face of the worklist semantics. + /// Completely clear the PriorityWorklist void clear() { M.clear(); diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h index 139bf3c2116f..e167f36219d2 100644 --- a/contrib/llvm/include/llvm/Analysis/Loads.h +++ b/contrib/llvm/include/llvm/Analysis/Loads.h @@ -23,10 +23,9 @@ namespace llvm { class DataLayout; class MDNode; -/// isDereferenceablePointer - Return true if this is always a dereferenceable -/// pointer. If the context instruction is specified perform context-sensitive -/// analysis and return true if the pointer is dereferenceable at the -/// specified instruction. +/// Return true if this is always a dereferenceable pointer. If the context +/// instruction is specified perform context-sensitive analysis and return true +/// if the pointer is dereferenceable at the specified instruction. bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -40,8 +39,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. +/// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive /// analysis and returns true if it is safe to load immediately before ScanFrom. @@ -54,12 +52,12 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); -/// DefMaxInstsToScan - the default number of maximum instructions -/// to scan in the block, used by FindAvailableLoadedValue(). +/// The default number of maximum instructions to scan in the block, used by +/// FindAvailableLoadedValue(). extern cl::opt DefMaxInstsToScan; -/// \brief Scan backwards to see if we have the value of the given load -/// available locally within a small number of instructions. +/// Scan backwards to see if we have the value of the given load available +/// locally within a small number of instructions. /// /// You can use this function to scan across multiple blocks: after you call /// this function, if ScanFrom points at the beginning of the block, it's safe diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h index c1be46ddd7b5..be8822df3dba 100644 --- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -208,6 +208,8 @@ public: SledKind Kind; bool AlwaysInstrument; const class Function *Fn; + + void emit(int, MCStreamer *, const MCSymbol *) const; }; // All the sleds to be emitted. @@ -216,6 +218,9 @@ public: // Helper function to record a given XRay sled. void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); + /// Emit a table with all XRay instrumentation points. + void emitXRayTable(); + //===------------------------------------------------------------------===// // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h index 76e1df89169e..21ecef587aa5 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h @@ -59,6 +59,9 @@ class MachineDominatorTree : public MachineFunctionPass { /// such as BB == elt.NewBB. mutable SmallSet NewBBs; + /// The DominatorTreeBase that is used to compute a normal dominator tree + DominatorTreeBase* DT; + /// \brief Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses /// the fast query path of DT as much as possible. @@ -68,7 +71,6 @@ class MachineDominatorTree : public MachineFunctionPass { public: static char ID; // Pass ID, replacement for typeid - DominatorTreeBase* DT; MachineDominatorTree(); diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index ca9a6c822876..878f1c76ebf6 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -116,12 +116,12 @@ public: // An unsigned integer indicating the identity of the source file // corresponding to a machine instruction. uint16_t File; - // An unsigned integer whose value encodes the applicable instruction set - // architecture for the current instruction. - uint8_t Isa; // An unsigned integer representing the DWARF path discriminator value // for this location. uint32_t Discriminator; + // An unsigned integer whose value encodes the applicable instruction set + // architecture for the current instruction. + uint8_t Isa; // A boolean indicating that the current instruction is the beginning of a // statement. uint8_t IsStmt:1, diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 078959ce15d0..07d5b5ea40dc 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id : // Instruction Intrinsics //===----------------------------------------------------------------------===// +// The first parameter is s_sendmsg immediate (i16), +// the second one is copied to m0 +def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; +def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td index 3a496cb6645c..85966af9c820 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2063,130 +2063,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } -// Vector extract and insert -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_vextractf32x4_512 : - GCCBuiltin<"__builtin_ia32_extractf32x4_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x4_512 : - GCCBuiltin<"__builtin_ia32_extracti32x4_mask">, - Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf32x4_256 : - GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">, - Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty, - llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x4_256 : - GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">, - Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty, - llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x2_256 : - GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x2_256 : - GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">, - Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x2_512 : - GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">, - Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty, - llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x2_512 : - GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">, - Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty, - llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf32x8_512 : - GCCBuiltin<"__builtin_ia32_extractf32x8_mask">, - Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty, - llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti32x8_512 : - GCCBuiltin<"__builtin_ia32_extracti32x8_mask">, - Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty, - llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextractf64x4_512 : - GCCBuiltin<"__builtin_ia32_extractf64x4_mask">, - Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty, - llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_mask_vextracti64x4_512 : - GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, - Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty, - llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x4_256 : - GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x4_512 : - GCCBuiltin<"__builtin_ia32_insertf32x4_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf32x8_512 : - GCCBuiltin<"__builtin_ia32_insertf32x8_mask">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x2_256 : - GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">, - Intrinsic<[llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x2_512 : - GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_insertf64x4_512 : - GCCBuiltin<"__builtin_ia32_insertf64x4_mask">, - Intrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x4_256 : - GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x4_512 : - GCCBuiltin<"__builtin_ia32_inserti32x4_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti32x8_512 : - GCCBuiltin<"__builtin_ia32_inserti32x8_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x2_256 : - GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x2_512 : - GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_inserti64x4_512 : - GCCBuiltin<"__builtin_ia32_inserti64x4_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; -} - // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h index 9d8d8c3ffb5c..347f21108913 100644 --- a/contrib/llvm/include/llvm/Support/FileSystem.h +++ b/contrib/llvm/include/llvm/Support/FileSystem.h @@ -769,17 +769,13 @@ namespace detail { std::error_code directory_iterator_increment(DirIterState &); std::error_code directory_iterator_destruct(DirIterState &); - /// DirIterState - Keeps state for the directory_iterator. It is reference - /// counted in order to preserve InputIterator semantics on copy. - struct DirIterState : public RefCountedBase { - DirIterState() - : IterationHandle(0) {} - + /// Keeps state for the directory_iterator. + struct DirIterState { ~DirIterState() { directory_iterator_destruct(*this); } - intptr_t IterationHandle; + intptr_t IterationHandle = 0; directory_entry CurrentEntry; }; } // end namespace detail @@ -788,23 +784,23 @@ namespace detail { /// operator++ because we need an error_code. If it's really needed we can make /// it call report_fatal_error on error. class directory_iterator { - IntrusiveRefCntPtr State; + std::shared_ptr State; public: explicit directory_iterator(const Twine &path, std::error_code &ec) { - State = new detail::DirIterState; + State = std::make_shared(); SmallString<128> path_storage; ec = detail::directory_iterator_construct(*State, path.toStringRef(path_storage)); } explicit directory_iterator(const directory_entry &de, std::error_code &ec) { - State = new detail::DirIterState; + State = std::make_shared(); ec = detail::directory_iterator_construct(*State, de.path()); } /// Construct end iterator. - directory_iterator() : State(nullptr) {} + directory_iterator() = default; // No operator++ because we need error_code. directory_iterator &increment(std::error_code &ec) { diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h index 38acb36942bc..cbba9c08275a 100644 --- a/contrib/llvm/include/llvm/Support/YAMLTraits.h +++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h @@ -209,6 +209,15 @@ struct DocumentListTraits { // static T::value_type& element(IO &io, T &seq, size_t index); }; +/// This class should be specialized by any type that needs to be converted +/// to/from a YAML mapping in the case where the names of the keys are not known +/// in advance, e.g. a string map. +template +struct CustomMappingTraits { + // static void inputOne(IO &io, StringRef key, T &elem); + // static void output(IO &io, T &elem); +}; + // Only used for better diagnostics of missing traits template struct MissingTrait; @@ -358,6 +367,23 @@ public: static bool const value = (sizeof(test>(nullptr)) == 1); }; +// Test if CustomMappingTraits is defined on type T. +template +struct has_CustomMappingTraits +{ + typedef void (*Signature_input)(IO &io, StringRef key, T &v); + + template + static char test(SameType*); + + template + static double test(...); + +public: + static bool const value = + (sizeof(test>(nullptr)) == 1); +}; + // has_FlowTraits will cause an error with some compilers because // it subclasses int. Using this wrapper only instantiates the // real has_FlowTraits only if the template type is a class. @@ -493,6 +519,7 @@ struct missingTraits !has_BlockScalarTraits::value && !has_MappingTraits::value && !has_SequenceTraits::value && + !has_CustomMappingTraits::value && !has_DocumentListTraits::value> {}; template @@ -531,6 +558,7 @@ public: virtual void endMapping() = 0; virtual bool preflightKey(const char*, bool, bool, bool &, void *&) = 0; virtual void postflightKey(void*) = 0; + virtual std::vector keys() = 0; virtual void beginFlowMapping() = 0; virtual void endFlowMapping() = 0; @@ -818,6 +846,21 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { } } +template +typename std::enable_if::value, void>::type +yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { + if ( io.outputting() ) { + io.beginMapping(); + CustomMappingTraits::output(io, Val); + io.endMapping(); + } else { + io.beginMapping(); + for (StringRef key : io.keys()) + CustomMappingTraits::inputOne(io, key, Val); + io.endMapping(); + } +} + template typename std::enable_if::value, void>::type yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) { @@ -1074,6 +1117,7 @@ private: void endMapping() override; bool preflightKey(const char *, bool, bool, bool &, void *&) override; void postflightKey(void *) override; + std::vector keys() override; void beginFlowMapping() override; void endFlowMapping() override; unsigned beginSequence() override; @@ -1154,10 +1198,8 @@ private: typedef llvm::StringMap> NameToNode; - bool isValidKey(StringRef key); - NameToNode Mapping; - llvm::SmallVector ValidKeys; + llvm::SmallVector ValidKeys; }; class SequenceHNode : public HNode { @@ -1215,6 +1257,7 @@ public: void endMapping() override; bool preflightKey(const char *key, bool, bool, bool &, void *&) override; void postflightKey(void *) override; + std::vector keys() override; void beginFlowMapping() override; void endFlowMapping() override; unsigned beginSequence() override; @@ -1384,6 +1427,17 @@ operator>>(Input &In, T &Val) { return In; } +// Define non-member operator>> so that Input can stream in a string map. +template +inline +typename std::enable_if::value, Input &>::type +operator>>(Input &In, T &Val) { + EmptyContext Ctx; + if (In.setCurrentDocument()) + yamlize(In, Val, true, Ctx); + return In; +} + // Provide better error message about types missing a trait specialization template inline typename std::enable_if::value, @@ -1457,6 +1511,21 @@ operator<<(Output &Out, T &Val) { return Out; } +// Define non-member operator<< so that Output can stream out a string map. +template +inline +typename std::enable_if::value, Output &>::type +operator<<(Output &Out, T &Val) { + EmptyContext Ctx; + Out.beginDocuments(); + if (Out.preflightDocument(0)) { + yamlize(Out, Val, true, Ctx); + Out.postflightDocument(); + } + Out.endDocuments(); + return Out; +} + // Provide better error message about types missing a trait specialization template inline typename std::enable_if::value, @@ -1476,6 +1545,18 @@ template struct SequenceTraitsImpl { } }; +/// Implementation of CustomMappingTraits for std::map. +template struct StdMapStringCustomMappingTraitsImpl { + typedef std::map map_type; + static void inputOne(IO &io, StringRef key, map_type &v) { + io.mapRequired(key.str().c_str(), v[key]); + } + static void output(IO &io, map_type &v) { + for (auto &p : v) + io.mapRequired(p.first.c_str(), p.second); + } +}; + } // end namespace yaml } // end namespace llvm @@ -1530,4 +1611,15 @@ template struct SequenceTraitsImpl { } \ } +/// Utility for declaring that std::map should be considered +/// a YAML map. +#define LLVM_YAML_IS_STRING_MAP(_type) \ + namespace llvm { \ + namespace yaml { \ + template <> \ + struct CustomMappingTraits> \ + : public StdMapStringCustomMappingTraitsImpl<_type> {}; \ + } \ + } + #endif // LLVM_SUPPORT_YAMLTRAITS_H diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 2a77baec6c36..073b4e6ab26a 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -2542,9 +2542,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegZero(); - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == MaxDepth) return false; // Limit search depth. @@ -2589,9 +2586,6 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero(); - // FIXME: Magic number! At the least, this should be given a name because it's - // used similarly in CannotBeNegativeZero(). A better fix may be to - // expose it as a parameter, so it can be used for testing / experimenting. if (Depth == MaxDepth) return false; // Limit search depth. diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index cd08268d47b5..5da421a79b7b 100644 --- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -749,7 +749,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( // handles the case where this is type ODRed with a definition needed // by the importing module, in which case the existing definition is // used. - if (IsImporting && !ImportFullTypeDefinitions && + if (IsImporting && !ImportFullTypeDefinitions && Identifier && (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 0678bce449ed..79ecc4308fe7 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V, TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); NoopInput = Op; - } else if (isa(I)) { - // Look through call (skipping callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } - } else if (isa(I)) { - // Look through invoke (skipping BB, BB, Callee) - for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } + } else if (auto CS = ImmutableCallSite(I)) { + const Value *ReturnedOp = CS.getReturnedArgOperand(); + if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI)) + NoopInput = ReturnedOp; } else if (const InsertValueInst *IVI = dyn_cast(V)) { // Value may come from either the aggregate or the scalar ArrayRef InsertLoc = IVI->getIndices(); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index de0a4f0befa1..5f15ac1d503b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -37,6 +37,8 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" @@ -2610,6 +2612,61 @@ AsmPrinterHandler::~AsmPrinterHandler() {} void AsmPrinterHandler::markFunctionEnd() {} +// In the binary's "xray_instr_map" section, an array of these function entries +// describes each instrumentation point. When XRay patches your code, the index +// into this table will be given to your handler as a patch point identifier. +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, + const MCSymbol *CurrentFnSym) const { + Out->EmitSymbolValue(Sled, Bytes); + Out->EmitSymbolValue(CurrentFnSym, Bytes); + auto Kind8 = static_cast(Kind); + Out->EmitBytes(StringRef(reinterpret_cast(&Kind8), 1)); + Out->EmitBytes( + StringRef(reinterpret_cast(&AlwaysInstrument), 1)); + Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries +} + +void AsmPrinter::emitXRayTable() { + if (Sleds.empty()) + return; + + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + auto Fn = MF->getFunction(); + MCSection *Section = nullptr; + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + if (Fn->hasComdat()) { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); + } else { + Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + } + } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { + Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + SectionKind::getReadOnlyWithRel()); + } else { + llvm_unreachable("Unsupported target"); + } + + // Before we switch over, we force a reference to a label inside the + // xray_instr_map section. Since this function is always called just + // before the function's end, we assume that this is happening after + // the last return instruction. + + auto WordSizeBytes = TM.getPointerSize(); + MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + OutStreamer->EmitCodeAlignment(16); + OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); + OutStreamer->SwitchSection(Section); + OutStreamer->EmitLabel(Tmp); + for (const auto &Sled : Sleds) + Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + + OutStreamer->SwitchSection(PrevSection); + Sleds.clear(); +} + void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind) { auto Fn = MI.getParent()->getParent()->getFunction(); diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 422f2dc2f2fb..3d81184f774a 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -1124,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills( // earlier spill with smaller SlotIndex. for (const auto CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); - MachineDomTreeNode *Node = MDT.DT->getNode(Block); + MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; if (PrevSpill) { SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); @@ -1132,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills( MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; SpillsToRm.push_back(SpillToRm); - SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; + SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep; } else { - SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; + SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } for (const auto SpillToRm : SpillsToRm) @@ -1209,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders( // Sort the nodes in WorkSet in top-down order and save the nodes // in Orders. Orders will be used for hoisting in runHoistSpills. unsigned idx = 0; - Orders.push_back(MDT.DT->getNode(Root)); + Orders.push_back(MDT.getBase().getNode(Root)); do { MachineDomTreeNode *Node = Orders[idx++]; const std::vector &Children = Node->getChildren(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b4b41c3d0011..4632484055d2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4277,7 +4277,8 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG, + int64_t PartialOffset = 0) { bool IsIndexSignExt = false; // Split up a folded GlobalAddress+Offset into its component parts. @@ -4286,7 +4287,7 @@ struct BaseIndexOffset { return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), SDLoc(GA), GA->getValueType(0), - /*Offset=*/0, + /*Offset=*/PartialOffset, /*isTargetGA=*/false, GA->getTargetFlags()), SDValue(), @@ -4298,14 +4299,13 @@ struct BaseIndexOffset { // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // We know that we have at least an ADD instruction. Try to pattern match // the simple case of BASE + OFFSET. if (isa(Ptr->getOperand(1))) { int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); - return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, - IsIndexSignExt); + return match(Ptr->getOperand(0), DAG, Offset + PartialOffset); } // Inside a loop the current BASE pointer is calculated using an ADD and a @@ -4314,7 +4314,7 @@ struct BaseIndexOffset { // (i64 mul (i64 %induction_var) // (i64 %element_size))) if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); @@ -4328,14 +4328,14 @@ struct BaseIndexOffset { // Either the case of Base + Index (no offset) or something else. if (IndexOffset->getOpcode() != ISD::ADD) - return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt); // Now we have the case of Base + Index + offset. SDValue Index = IndexOffset->getOperand(0); SDValue Offset = IndexOffset->getOperand(1); if (!isa(Offset)) - return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt); // Ignore signextends. if (Index->getOpcode() == ISD::SIGN_EXTEND) { @@ -4344,7 +4344,7 @@ struct BaseIndexOffset { } else IsIndexSignExt = false; int64_t Off = cast(Offset)->getSExtValue(); - return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt); } }; } // namespace diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 324d07118704..57b5d85bb550 100644 --- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -88,15 +88,15 @@ void OProfileJITEventListener::NotifyObjectEmitted( // Use symbol info to iterate functions in the object. for (const std::pair &P : computeSymbolSizes(DebugObj)) { SymbolRef Sym = P.first; - if (Sym.getType() != SymbolRef::ST_Function) + if (!Sym.getType() || *Sym.getType() != SymbolRef::ST_Function) continue; - ErrorOr NameOrErr = Sym.getName(); - if (NameOrErr.getError()) + Expected NameOrErr = Sym.getName(); + if (!NameOrErr) continue; StringRef Name = *NameOrErr; - ErrorOr AddrOrErr = Sym.getAddress(); - if (AddrOrErr.getError()) + Expected AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) continue; uint64_t Addr = *AddrOrErr; uint64_t Size = P.second; @@ -128,9 +128,9 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { for (symbol_iterator I = DebugObj.symbol_begin(), E = DebugObj.symbol_end(); I != E; ++I) { - if (I->getType() == SymbolRef::ST_Function) { - ErrorOr AddrOrErr = I->getAddress(); - if (AddrOrErr.getError()) + if (I->getType() && *I->getType() == SymbolRef::ST_Function) { + Expected AddrOrErr = I->getAddress(); + if (!AddrOrErr) continue; uint64_t Addr = *AddrOrErr; diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp index 2d9d0f95efa5..a87b9bec1ed2 100644 --- a/contrib/llvm/lib/IR/AutoUpgrade.cpp +++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp @@ -342,8 +342,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 Name.startswith("avx.vinsertf128.") || // Added in 3.7 Name == "avx2.vinserti128" || // Added in 3.7 + Name.startswith("avx512.mask.insert") || // Added in 4.0 Name.startswith("avx.vextractf128.") || // Added in 3.7 Name == "avx2.vextracti128" || // Added in 3.7 + Name.startswith("avx512.mask.vextract") || // Added in 4.0 Name.startswith("sse4a.movnt.") || // Added in 3.9 Name.startswith("avx.movnt.") || // Added in 3.2 Name.startswith("avx512.storent.") || // Added in 3.9 @@ -1150,21 +1152,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || - Name == "avx2.vinserti128")) { + Name == "avx2.vinserti128" || + Name.startswith("avx512.mask.insert"))) { Value *Op0 = CI->getArgOperand(0); Value *Op1 = CI->getArgOperand(1); unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); - VectorType *VecTy = cast(CI->getType()); - unsigned NumElts = VecTy->getNumElements(); + unsigned DstNumElts = CI->getType()->getVectorNumElements(); + unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); + unsigned Scale = DstNumElts / SrcNumElts; // Mask off the high bits of the immediate value; hardware ignores those. - Imm = Imm & 1; + Imm = Imm % Scale; - // Extend the second operand into a vector that is twice as big. + // Extend the second operand into a vector the size of the destination. Value *UndefV = UndefValue::get(Op1->getType()); - SmallVector Idxs(NumElts); - for (unsigned i = 0; i != NumElts; ++i) + SmallVector Idxs(DstNumElts); + for (unsigned i = 0; i != SrcNumElts; ++i) Idxs[i] = i; + for (unsigned i = SrcNumElts; i != DstNumElts; ++i) + Idxs[i] = SrcNumElts; Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); // Insert the second operand into the first operand. @@ -1178,33 +1184,41 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Imm = 1 // Imm = 0 - // The low half of the result is either the low half of the 1st operand - // or the low half of the 2nd operand (the inserted vector). - for (unsigned i = 0; i != NumElts / 2; ++i) - Idxs[i] = Imm ? i : (i + NumElts); - // The high half of the result is either the low half of the 2nd operand - // (the inserted vector) or the high half of the 1st operand. - for (unsigned i = NumElts / 2; i != NumElts; ++i) - Idxs[i] = Imm ? (i + NumElts / 2) : i; + // First fill with identify mask. + for (unsigned i = 0; i != DstNumElts; ++i) + Idxs[i] = i; + // Then replace the elements where we need to insert. + for (unsigned i = 0; i != SrcNumElts; ++i) + Idxs[i + Imm * SrcNumElts] = i + DstNumElts; Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); + + // If the intrinsic has a mask operand, handle that. + if (CI->getNumArgOperands() == 5) + Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, + CI->getArgOperand(3)); } else if (IsX86 && (Name.startswith("avx.vextractf128.") || - Name == "avx2.vextracti128")) { + Name == "avx2.vextracti128" || + Name.startswith("avx512.mask.vextract"))) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); - VectorType *VecTy = cast(CI->getType()); - unsigned NumElts = VecTy->getNumElements(); + unsigned DstNumElts = CI->getType()->getVectorNumElements(); + unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); + unsigned Scale = SrcNumElts / DstNumElts; // Mask off the high bits of the immediate value; hardware ignores those. - Imm = Imm & 1; + Imm = Imm % Scale; - // Get indexes for either the high half or low half of the input vector. - SmallVector Idxs(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - Idxs[i] = Imm ? (i + NumElts) : i; + // Get indexes for the subvector of the input vector. + SmallVector Idxs(DstNumElts); + for (unsigned i = 0; i != DstNumElts; ++i) { + Idxs[i] = i + (Imm * DstNumElts); } + Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); - Value *UndefV = UndefValue::get(Op0->getType()); - Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); + // If the intrinsic has a mask operand, handle that. + if (CI->getNumArgOperands() == 4) + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, + CI->getArgOperand(2)); } else if (!IsX86 && Name == "stackprotectorcheck") { Rep = nullptr; } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp index 7364f0e0cd31..42b3a344352b 100644 --- a/contrib/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm/lib/LTO/LTO.cpp @@ -891,23 +891,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); - // Partition numbers for ThinLTO jobs start at 1 (see comments for - // GlobalResolution in LTO.h). Task numbers, however, start at - // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0 - // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code - // generation partitions. + // Task numbers start at ParallelCodeGenParallelismLevel if an LTO + // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 + // are reserved for parallel code generation partitions. unsigned Task = HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0; - unsigned Partition = 1; - for (auto &Mod : ThinLTO.ModuleMap) { if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], ResolvedODR[Mod.first], ThinLTO.ModuleMap)) return E; - ++Task; - ++Partition; } return BackendProc->wait(); diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index 30f0deab90a0..4cfbbf8645e0 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -76,8 +76,12 @@ namespace llvm { compile-time arithmetic on PPC double-double numbers, it is not able to represent all possible values held by a PPC double-double number, for example: (long double) 1.0 + (long double) 0x1p-106 - Should this be replaced by a full emulation of PPC double-double? */ - static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0}; + Should this be replaced by a full emulation of PPC double-double? + + Note: we need to make the value different from semBogus as otherwise + an unsafe optimization may collapse both values to a single address, + and we heavily rely on them having distinct addresses. */ + static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0}; /* There are temporary semantics for the real PPCDoubleDouble implementation. Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index dd19eee15f62..49d0ed55a716 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -1069,6 +1069,7 @@ StringRef sys::getHostCPUName() { .Case("POWER7", "pwr7") .Case("POWER8", "pwr8") .Case("POWER8E", "pwr8") + .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") .Default(generic); } diff --git a/contrib/llvm/lib/Support/NativeFormatting.cpp b/contrib/llvm/lib/Support/NativeFormatting.cpp index bb8689141098..b951a88a38db 100644 --- a/contrib/llvm/lib/Support/NativeFormatting.cpp +++ b/contrib/llvm/lib/Support/NativeFormatting.cpp @@ -239,10 +239,7 @@ void llvm::write_double(raw_ostream &S, double N, FloatStyle Style, N *= 100.0; char Buf[32]; - unsigned Len; - Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf)); - if (Style == FloatStyle::Percent) - ++Len; + format(Spec.c_str(), N).snprint(Buf, sizeof(Buf)); S << Buf; if (Style == FloatStyle::Percent) S << '%'; diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp index 99d2070cb6ed..9849b3aa1ce9 100644 --- a/contrib/llvm/lib/Support/YAMLTraits.cpp +++ b/contrib/llvm/lib/Support/YAMLTraits.cpp @@ -118,6 +118,18 @@ void Input::beginMapping() { } } +std::vector Input::keys() { + MapHNode *MN = dyn_cast(CurrentNode); + std::vector Ret; + if (!MN) { + setError(CurrentNode, "not a mapping"); + return Ret; + } + for (auto &P : MN->Mapping) + Ret.push_back(P.first()); + return Ret; +} + bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, void *&SaveInfo) { UseDefault = false; @@ -163,7 +175,7 @@ void Input::endMapping() { if (!MN) return; for (const auto &NN : MN->Mapping) { - if (!MN->isValidKey(NN.first())) { + if (!is_contained(MN->ValidKeys, NN.first())) { setError(NN.second.get(), Twine("unknown key '") + NN.first() + "'"); break; } @@ -373,14 +385,6 @@ std::unique_ptr Input::createHNodes(Node *N) { } } -bool Input::MapHNode::isValidKey(StringRef Key) { - for (const char *K : ValidKeys) { - if (Key.equals(K)) - return true; - } - return false; -} - void Input::setError(const Twine &Message) { this->setError(CurrentNode, Message); } @@ -451,6 +455,10 @@ void Output::endMapping() { StateStack.pop_back(); } +std::vector Output::keys() { + report_fatal_error("invalid call"); +} + bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault, bool &UseDefault, void *&) { UseDefault = false; diff --git a/contrib/llvm/lib/TableGen/StringMatcher.cpp b/contrib/llvm/lib/TableGen/StringMatcher.cpp index 16681702d1d6..0c83da65e19e 100644 --- a/contrib/llvm/lib/TableGen/StringMatcher.cpp +++ b/contrib/llvm/lib/TableGen/StringMatcher.cpp @@ -11,9 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/StringMatcher.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/StringMatcher.h" +#include #include +#include +#include +#include + using namespace llvm; /// FindFirstNonCommonLetter - Find the first character in the keys of the @@ -67,7 +73,7 @@ EmitStringMatcherForChar(const std::vector &Matches, } // Bucket the matches by the character we are comparing. - std::map > MatchesByLetter; + std::map> MatchesByLetter; for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); @@ -91,7 +97,7 @@ EmitStringMatcherForChar(const std::vector &Matches, // FIXME: Need to escape general strings. OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " - << NumChars << "))\n"; + << NumChars << ") != 0)\n"; OS << Indent << " break;\n"; } @@ -103,7 +109,7 @@ EmitStringMatcherForChar(const std::vector &Matches, OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; OS << Indent << "default: break;\n"; - for (std::map >::iterator LI = + for (std::map>::iterator LI = MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { // TODO: escape hard stuff (like \n) if we ever care about it. OS << Indent << "case '" << LI->first << "':\t // " @@ -118,7 +124,6 @@ EmitStringMatcherForChar(const std::vector &Matches, return true; } - /// Emit - Top level entry point. /// void StringMatcher::Emit(unsigned Indent) const { @@ -126,7 +131,7 @@ void StringMatcher::Emit(unsigned Indent) const { if (Matches.empty()) return; // First level categorization: group strings by length. - std::map > MatchesByLength; + std::map> MatchesByLength; for (unsigned i = 0, e = Matches.size(); i != e; ++i) MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); @@ -136,7 +141,7 @@ void StringMatcher::Emit(unsigned Indent) const { OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; OS.indent(Indent*2+2) << "default: break;\n"; - for (std::map >::iterator LI = + for (std::map>::iterator LI = MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " << LI->second.size() diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index c40391d5ad9d..740766b151bb 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ FeatureCRC, FeatureCrypto, + FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing ]>; def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index b2d96a32fd3a..efc221893782 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -76,7 +76,6 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - void EmitXRayTable(); void EmitSled(const MachineInstr &MI, SledKind Kind); /// \brief tblgen'erated driver function for lowering simple MI->MC @@ -95,7 +94,7 @@ public: AArch64FI = F.getInfo(); STI = static_cast(&F.getSubtarget()); bool Result = AsmPrinter::runOnMachineFunction(F); - EmitXRayTable(); + emitXRayTable(); return Result; } @@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) EmitSled(MI, SledKind::TAIL_CALL); } -void AArch64AsmPrinter::EmitXRayTable() -{ - //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid - // code duplication as it is now for x86_64, ARM32 and AArch64. - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section; - - if (STI->isTargetELF()) { - if (Fn->hasComdat()) - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - else - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } else if (STI->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { static const int8_t NoopsInSledCount = 7; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dcb05601e5f4..8a76c42b5898 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; + // Allow one more for offset. + if (Offset > 0) + Offset -= OffsetStride; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return false; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a87204d46eae..0b0a0e7d083e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 5cc5efb331e3..745c9923de2e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -313,6 +313,7 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + SENDMSGHALT, INTERP_MOV, INTERP_P1, INTERP_P2, diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e7b40016e272..f079c8d0c70c 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", SDTypeProfile<1, 3, [SDTCisFP<0>]>, [SDNPInGlue]>; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fa53831cbe16..c78e97dfd46f 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_sendmsg: { + case AMDGPUIntrinsic::SI_sendmsg: + case Intrinsic::amdgcn_s_sendmsg: { Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); SDValue Glue = Chain.getValue(1); return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_s_sendmsghalt: { + Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + SDValue Glue = Chain.getValue(1); + return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain, + Op.getOperand(2), Glue); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 202a1e9ed8ac..fceabd7a8fdd 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. - if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); LastInstWritesM0 = false; return; @@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) || - I->getOpcode() == AMDGPU::S_SENDMSG) + I->getOpcode() == AMDGPU::S_SENDMSG || + I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; else Required = handleOperands(*I); diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0aeb1297d3a7..73cd5774128e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", [(AMDGPUsendmsg (i32 imm:$simm16))] >; + +def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", + [(AMDGPUsendmsghalt (i32 imm:$simm16))] +>; } // End Uses = [EXEC, M0] -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">; def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index f20768ab77a5..8ec9cb02813c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); - // Emit the XRay table for this function. - EmitXRayTable(); - // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index ce0b04d56d9e..93fed10eb2d0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -113,9 +113,6 @@ public: void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - // Helper function that emits the XRay sleds we've collected for a particular - // function. - void EmitXRayTable(); private: void EmitSled(const MachineInstr &MI, SledKind Kind); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 293a527b09e8..07044b9697b6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -22,9 +22,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { EmitSled(MI, SledKind::TAIL_CALL); } - -void ARMAsmPrinter::EmitXRayTable() -{ - if (Sleds.empty()) - return; - - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP | - ELF::SHF_MERGE, - 0, CurrentFnSym->getName()); - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - OutStreamer->SwitchSection(Section); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 4); - OutStreamer->EmitSymbolValue(CurrentFnSym, 4); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(6); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp index c0591c332dea..963fb99ce09b 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -53,28 +53,36 @@ // // The code below is intended to be fully target-independent. +#include "BitTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - -#include "BitTracker.h" +#include +#include +#include using namespace llvm; typedef BitTracker BT; namespace { + // Local trickery to pretty print a register (without the whole "%vreg" // business). struct printv { printv(unsigned r) : R(r) {} + unsigned R; }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); @@ -82,9 +90,11 @@ namespace { OS << 's'; return OS; } -} + +} // end anonymous namespace namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) { switch (BV.Type) { case BT::BitValue::Top: @@ -167,14 +177,14 @@ namespace llvm { return OS; } -} + +} // end namespace llvm void BitTracker::print_cells(raw_ostream &OS) const { for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; } - BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} @@ -182,7 +192,6 @@ BitTracker::~BitTracker() { delete ⤅ } - // If we were allowed to update a cell for a part of a register, the meet // operation would need to be parametrized by the register number and the // exact part of the register, so that the computer BitRefs correspond to @@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { return Changed; } - // Insert the entire cell RC into the current cell at position given by M. BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, const BitMask &M) { @@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, return *this; } - BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { uint16_t B = M.first(), E = M.last(), W = width(); assert(B < W && E < W); @@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { return RC; } - BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { // Rotate left (i.e. towards increasing bit indices). // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] @@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { return *this; } - BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, const BitValue &V) { assert(B <= E); @@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, return *this; } - BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { // Append the cell given as the argument to the "this" cell. // Bit 0 of RC becomes bit W of the result, where W is this->width(). @@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { return *this; } - uint16_t BT::RegisterCell::ct(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const { return C; } - uint16_t BT::RegisterCell::cl(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const { return C; } - bool BT::RegisterCell::operator== (const RegisterCell &RC) const { uint16_t W = Bits.size(); if (RC.Bits.size() != W) @@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const { return true; } - uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // The general problem is with finding a register class that corresponds // to a given reference reg:sub. There can be several such classes, and @@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { return BW; } - BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, const CellMapType &M) const { uint16_t BW = getRegBitWidth(RR); @@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, return RegisterCell::top(BW); } - void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const { // While updating the cell map can be done in a meaningful way for @@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, M[RR.Reg] = RC; } - // Check if the cell represents a compile-time integer value. bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { uint16_t W = A.width(); @@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { return true; } - // Convert a cell to the integer value. The result must fit in uint64_t. uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { assert(isInt(A)); @@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { return Val; } - // Evaluator helper functions. These implement some common operation on // register cells that can be used to implement target-specific instructions // in a target-specific evaluator. @@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { const APInt &A = CI->getValue(); uint16_t BW = A.getBitWidth(); @@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, uint16_t Sh) const { assert(Sh <= A1.width()); @@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { uint16_t W = A1.width(); RegisterCell Res(W); @@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.cl(B), AW = A1.width(); @@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.ct(B), AW = A1.width(); @@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const { uint16_t W = A1.width(); @@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, const RegisterCell &A2, uint16_t AtN) const { uint16_t W1 = A1.width(), W2 = A2.width(); @@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, return Res; } - BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); uint16_t W = getRegBitWidth(Reg); @@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, return true; } - // Main W-Z implementation. void BT::visitPHI(const MachineInstr &PI) { @@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { } } - void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; @@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) { } } - BT::RegisterCell BT::get(RegisterRef RR) const { return ME.getCell(RR, Map); } - void BT::put(RegisterRef RR, const RegisterCell &RC) { ME.putCell(RR, RC, Map); } - // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { @@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { } } - // Check if the block has been "executed" during propagation. (If not, the // block is dead, but it may still appear to be reachable.) bool BT::reached(const MachineBasicBlock *B) const { @@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const { return false; } - // Visit an individual instruction. This could be a newly added instruction, // or one that has been modified by an optimization. void BT::visit(const MachineInstr &MI) { @@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) { FlowQ.pop(); } - void BT::reset() { EdgeExec.clear(); InstrExec.clear(); Map.clear(); } - void BT::run() { reset(); assert(FlowQ.empty()); @@ -1141,4 +1106,3 @@ void BT::run() { if (Trace) print_cells(dbgs() << "Cells after propagation:\n"); } - diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h index 74cafcd00b60..48c5f2266acf 100644 --- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h @@ -1,4 +1,4 @@ -//===--- BitTracker.h -----------------------------------------------------===// +//===--- BitTracker.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,24 +7,27 @@ // //===----------------------------------------------------------------------===// -#ifndef BITTRACKER_H -#define BITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" - +#include "llvm/CodeGen/MachineOperand.h" +#include +#include #include #include #include +#include namespace llvm { - class ConstantInt; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineInstr; - class MachineOperand; - class raw_ostream; + +class ConstantInt; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineInstr; +class raw_ostream; struct BitTracker { struct BitRef; @@ -76,19 +79,19 @@ private: CellMapType ⤅ }; - // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); } + unsigned Reg; uint16_t Pos; }; - // Abstraction of a register reference in MachineOperand. It contains the // register number and the subregister index. struct BitTracker::RegisterRef { @@ -96,10 +99,10 @@ struct BitTracker::RegisterRef { : Reg(R), Sub(S) {} RegisterRef(const MachineOperand &MO) : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; }; - // Value that a single bit can take. This is outside of the context of // any register, it is more of an abstraction of the two-element set of // possible bit values. One extension here is the "Ref" type, which @@ -158,6 +161,7 @@ struct BitTracker::BitValue { bool operator!= (const BitValue &V) const { return !operator==(V); } + bool is(unsigned T) const { assert(T == 0 || T == 1); return T == 0 ? Type == Zero @@ -209,6 +213,7 @@ struct BitTracker::BitValue { bool num() const { return Type == Zero || Type == One; } + operator bool() const { assert(Type == Zero || Type == One); return Type == One; @@ -217,7 +222,6 @@ struct BitTracker::BitValue { friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); }; - // This operation must be idempotent, i.e. ref(ref(V)) == ref(V). inline BitTracker::BitValue BitTracker::BitValue::ref(const BitValue &V) { @@ -228,25 +232,25 @@ BitTracker::BitValue::ref(const BitValue &V) { return self(); } - inline BitTracker::BitValue BitTracker::BitValue::self(const BitRef &Self) { return BitValue(Self.Reg, Self.Pos); } - // A sequence of bits starting from index B up to and including index E. // If E < B, the mask represents two sections: [0..E] and [B..W) where // W is the width of the register. struct BitTracker::BitMask { - BitMask() : B(0), E(0) {} + BitMask() = default; BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } uint16_t last() const { return E; } -private: - uint16_t B, E; -}; +private: + uint16_t B = 0; + uint16_t E = 0; +}; // Representation of a register: a list of BitValues. struct BitTracker::RegisterCell { @@ -255,6 +259,7 @@ struct BitTracker::RegisterCell { uint16_t width() const { return Bits.size(); } + const BitValue &operator[](uint16_t BitN) const { assert(BitN < Bits.size()); return Bits[BitN]; @@ -297,12 +302,10 @@ private: friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); }; - inline bool BitTracker::has(unsigned Reg) const { return Map.find(Reg) != Map.end(); } - inline const BitTracker::RegisterCell& BitTracker::lookup(unsigned Reg) const { CellMapType::const_iterator F = Map.find(Reg); @@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const { return F->second; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { RegisterCell RC(Width); @@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::top(uint16_t Width) { RegisterCell RC(Width); @@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::ref(const RegisterCell &C) { uint16_t W = C.width(); @@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { struct BitTracker::MachineEvaluator { MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) : TRI(T), MRI(M) {} - virtual ~MachineEvaluator() {} + virtual ~MachineEvaluator() = default; uint16_t getRegBitWidth(const RegisterRef &RR) const; RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not // the cells directly. This function is a convenience wrapper to quickly // generate a ref for a cell corresponding to a register reference. @@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index b78c4126e0b1..436f88dcd450 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,16 +7,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "Hexagon.h" +#include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, } } - BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + using namespace Hexagon; + if (Sub == 0) return MachineEvaluator::mask(Reg, 0); - using namespace Hexagon; const TargetRegisterClass *RC = MRI.getRegClass(Reg); unsigned ID = RC->getID(); uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); @@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } namespace { + class RegisterRefs { std::vector Vector; @@ -117,17 +132,21 @@ public: } size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { // The main purpose of this operator is to assert with bad argument. assert(n < Vector.size()); return Vector[n]; } }; -} + +} // end anonymous namespace bool HexagonEvaluator::evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. @@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, if (NumDefs == 0) return false; - using namespace Hexagon; unsigned Opc = MI.getOpcode(); if (MI.mayLoad()) { @@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case S2_cl0: case S2_cl0p: // Always produce a 32-bit result. - return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs); case S2_cl1: case S2_cl1p: - return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs); case S2_clb: case S2_clbp: { uint16_t W1 = getRegBitWidth(Reg[1]); @@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } case S2_ct0: case S2_ct0p: - return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs); case S2_ct1: case S2_ct1p: - return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs); case S5_popcountp: // TODO break; @@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + if (TII.isPredicated(MI)) return false; assert(MI.mayLoad() && "A load that mayn't?"); @@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, uint16_t BitNum; bool SignEx; - using namespace Hexagon; switch (Opc) { default: @@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, return true; } - unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); @@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; } - unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { typedef MachineRegisterInfo::livein_iterator iterator; for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h index 9e7b1dbe298f..2cbf65e66ca6 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.h @@ -1,4 +1,4 @@ -//===--- HexagonBitTracker.h ----------------------------------------------===// +//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef HEXAGONBITTRACKER_H -#define HEXAGONBITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H #include "BitTracker.h" #include "llvm/ADT/DenseMap.h" +#include namespace llvm { - class HexagonInstrInfo; - class HexagonRegisterInfo; + +class HexagonInstrInfo; +class HexagonRegisterInfo; struct HexagonEvaluator : public BitTracker::MachineEvaluator { typedef BitTracker::CellMapType CellMapType; @@ -49,10 +51,12 @@ private: // Type of formal parameter extension. struct ExtType { enum { SExt, ZExt }; - char Type; - uint16_t Width; - ExtType() : Type(0), Width(0) {} + + ExtType() = default; ExtType(char t, uint16_t w) : Type(t), Width(w) {} + + char Type = 0; + uint16_t Width = 0; }; // Map VR -> extension type. typedef DenseMap RegExtMap; @@ -61,4 +65,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 34ce3e652995..0a7dc6b49d00 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,26 +11,45 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonHazardRecognizer.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include #include +#include +#include +#include using namespace llvm; @@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), RI() {} - static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); } - static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) && isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi)); } - /// Calculate number of instructions excluding the debug instructions. static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, MachineBasicBlock::const_instr_iterator MIE) { @@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, return Count; } - /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions @@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return &*I; // We've reached a different loop, which means the loop0 has been removed. if (Opc == EndLoopOp) - return 0; + return nullptr; } // Check the predecessors for the LOOP instruction. MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); if (loop) return loop; } - return 0; + return nullptr; } - /// Gather register def/uses from MI. /// This treats possible (predicated) defs as actually happening ones /// (conservatively). @@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI, } } - // Position dependent, so check twice for swap. static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { return false; } - - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } - /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return 0; } - /// This function can analyze one/two way branching only and should (mostly) be /// called by target independent side. /// First entry is always the opcode of the branching instruction, except when @@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + DEBUG(dbgs() << "\nErasing the jump to successor block\n";); I->eraseFromParent(); I = MBB.instr_end(); if (I == MBB.instr_begin()) @@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - for (;;) { + while (true) { if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) SecondLastInst = &*I; @@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } - unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); @@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, return nonDbgBBSize(&MBB) <= 3; } - bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) @@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; } - bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs, BranchProbability Probability) const { return NumInstrs <= 4; @@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Unimplemented"); } - void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { @@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot( } } - static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) { const MachineBasicBlock &B = *MI.getParent(); Regs.addLiveOuts(B); @@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; } - // We indicate that we want to reverse the branch by // inserting the reversed branching opcode. bool HexagonInstrInfo::reverseBranchCondition( @@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition( return false; } - void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); } - bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const { return getAddrMode(MI) == HexagonII::PostInc; } - // Returns true if an instruction is predicated irrespective of the predicate // sense. For example, all of the following will return true. // if (p0) R1 = add(R2, R3) @@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const { return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || @@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction( return true; } - bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, ArrayRef Pred2) const { // TODO: Fix this return false; } - bool HexagonInstrInfo::DefinesPredicate( MachineInstr &MI, std::vector &Pred) const { auto &HRI = getRegisterInfo(); @@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate( return false; } - bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const { return MI.getDesc().isPredicable(); } @@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, return false; } - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not @@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, return Length; } - ScheduleHazardRecognizer* HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { @@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return getInstrTimingClassLatency(ItinData, MI); } - DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); return static_cast(STI).createDFAPacketizer(II); } - // Inspired by this pair: // %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] // S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1] @@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( return false; } - /// If the instruction is an increment of a constant value, return the amount. bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, int &Value) const { @@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } - unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { return NewReg; } - bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const { return (getAddrMode(MI) == HexagonII::AbsoluteSet); } - bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } - bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { return false; } - // Return true if the instruction is a compund branch instruction. bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch()); } - bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { return (MI.isBranch() && isPredicated(MI)) || isConditionalTransfer(MI) || @@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { !isPredicatedNew(MI)); } - bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_paddf: @@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { return false; } - // FIXME - Function name and it's functionality don't match. // It should be renamed to hasPredNewOpcode() bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { @@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { return PNewOpcode >= 0; } - // Returns true if an instruction is a conditional store. // // Note: It doesn't include conditional new-value stores as they can't be @@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_tfrt: @@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { return false; } - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { @@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } - bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { return false; } - // Return true when ConsMI uses a register defined by ProdMI. bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { @@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, return false; } - // Returns true if the instruction is alread a .cur. bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { return false; } - // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { @@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { return false; } - /// Symmetrical. See if these two instructions are fit for duplex pair. bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const { @@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); } - bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { if (MI.mayLoad() || MI.mayStore() || MI.isCompare()) return true; @@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { return (Opcode == Hexagon::ENDLOOP0 || Opcode == Hexagon::ENDLOOP1); } - bool HexagonInstrInfo::isExpr(unsigned OpType) const { switch(OpType) { case MachineOperand::MO_MachineBasicBlock: @@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const { } } - bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { const MCInstrDesc &MID = MI.getDesc(); const uint64_t F = MID.TSFlags; @@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { return false; } - // This returns true in two cases: // - The OP code itself indicates that this is an extended instruction. // - One of MOs has been marked with HMOTF_ConstExtended flag. @@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::FPPos) & HexagonII::FPMask; } - // No V60 HVX VMEM with A_INDIRECT. bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, const MachineInstr &J) const { @@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); } - bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_callr : @@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_jumpr : @@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { return false; } - // Return true if a given MI can accommodate given offset. // Use abs estimate as oppose to the exact number. // TODO: This will need to be changed to use MC level @@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, } } - bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, const MachineInstr &ESMI) const { bool isLate = isLateResultInstr(LRMI); @@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, return false; } - bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { switch (MI.getOpcode()) { case TargetOpcode::EXTRACT_SUBREG: @@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { return true; } - bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const { // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply // resource, but all operands can be received late like an ALU instruction. return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; } - bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); return Opcode == Hexagon::J2_loop0i || @@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { Opcode == Hexagon::J2_loop1rext; } - bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return false; @@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const { return isNewValueJump(MI) || isNewValueStore(MI); } - bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const { return isNewValue(MI) && MI.isBranch(); } - bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } - bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const { @@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, == OperandNum; } - bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; assert(isPredicated(MI)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(isPredicated(Opcode)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; // Make sure that the instruction is predicated. @@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; } - bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(get(Opcode).isBranch() && @@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; } - bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const { return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || @@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; } - bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::STriw_pred : @@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { if (!MI.isBranch()) return false; @@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { return false; } - // Returns true when SU has a timing class TC1. bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); @@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; } - // Schedule this ASAP. bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, return false; } - bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const { const uint64_t V = getType(MI); return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } - // Check if the Offset is a valid auto-inc imm by Load/Store Type. // bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { @@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { llvm_unreachable("Not an auto-inc opc!"); } - bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, bool Extend) const { // This function is to check whether the "Offset" is in the correct range of @@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, "Please define it in the above switch statement!"); } - bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const { return isV60VectorInstruction(MI) && isAccumulator(MI); } - bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { const uint64_t F = get(MI.getOpcode()).TSFlags; const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); @@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { V == HexagonII::TypeCVI_VA_DV; } - bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) @@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { } } - // Add latency to instruction. bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, return false; } - /// \brief Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) @@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, return BaseReg != 0; } - /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { @@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, return false; } - bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const { unsigned Opc = CallMI.getOpcode(); return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr; } - bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) if (I.isEHLabel()) @@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { return false; } - // Returns true if an instruction can be converted into a non-extended // equivalent instruction. bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { @@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo) >= 0; } - bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) const { MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); @@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) return false; } - // Returns true, if a LD insn can be promoted to a cur load. bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { auto &HST = MI.getParent()->getParent()->getSubtarget(); @@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { HST.hasV60TOps(); } - // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; } - bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { // There is no stall when ProdMI is not a V60 vector. @@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, return true; } - bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator BII) const { // There is no stall when I is not a V60 vector. @@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, return false; } - bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const { for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) { @@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, return MI.getOpcode() != Hexagon::A4_tlbmatch; } - bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { return (Opcode == Hexagon::J2_jumpt) || (Opcode == Hexagon::J2_jumpf) || @@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { (Opcode == Hexagon::J2_jumpfnewpt); } - bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { if (Cond.empty() || !isPredicated(Cond[0].getImm())) return false; return !isPredicatedTrue(Cond[0].getImm()); } - short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const { return Hexagon::getAbsoluteForm(MI.getOpcode()); } - unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; } - // Returns the base register in a memory access (load/store). The offset is // returned in Offset and the access size is returned in AccessSize. unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, @@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, return MI.getOperand(basePos).getReg(); } - /// Return the position of the base and offset operands for this instruction. bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const { @@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, return true; } - // Inserts branching instructions in reverse order of their occurrence. // e.g. jump_t t1 (i1) // jump t2 (i2) @@ -3265,24 +3173,20 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( return Jumpers; } - short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { if (Opcode < 0) return -1; return Hexagon::getBaseWithLongOffset(Opcode); } - short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithLongOffset(MI.getOpcode()); } - short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithRegOffset(MI.getOpcode()); } - // Returns Operand Index for the constant extended instruction. unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( return HexagonII::HCG_None; } - // Returns -1 when there is no opcode found. unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, const MachineInstr &GB) const { @@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, return -1; } - int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : @@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { llvm_unreachable("Unexpected predicable instruction"); } - // Return the cur value instruction for a given store. int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } - - // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // @@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { // promoted. Therefore, in case of dependence check failure (due to R5) during // next iteration, it should be converted back to its most basic form. - // Return the new value instruction for a given store. int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode()); @@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { return 0; } - // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. @@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, } } - // Return .new predicate version for an instruction. int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { @@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, return 0; } - int HexagonInstrInfo::getDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form @@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const { return NewOp; } - // See if instruction could potentially be a duplex candidate. // If so, return its group. Zero otherwise. HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( @@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_None; } - short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } - // Return first non-debug instruction in the basic block. MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) const { @@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) return nullptr; } - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( return Latency; } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense( return true; } - unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int InvPredOpcode; InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) @@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { llvm_unreachable("Unexpected predicated instruction"); } - // Returns the max value that doesn't need to be extended. int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { return ~(-1U << bits); } - unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; } - // Returns the min value that doesn't need to be extended. int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { return 0; } - // Returns opcode of the non-extended equivalent instruction. short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { // Check if the instruction has a register form that uses register in place @@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { return -1; } - bool HexagonInstrInfo::getPredReg(ArrayRef Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) @@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef Cond, return true; } - short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo); } - short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const { return Hexagon::getRegForm(MI.getOpcode()); } - // Return the number of bytes required to encode the instruction. // Hexagon instructions are fixed length, 4 bytes, unless they // use a constant extender, which requires another 4 bytes. @@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const { return Size; } - uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::TypePos) & HexagonII::TypeMask; } - unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget(); const InstrItineraryData &II = *ST.getInstrItineraryData(); @@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } - unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; } - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); } - unsigned HexagonInstrInfo::nonDbgBundleSize( MachineBasicBlock::const_iterator BundleHead) const { assert(BundleHead->isBundle() && "Not a bundle header"); @@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize( return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator())); } - /// immediateExtend - Changes the instruction in place to one using an immediate /// extender. void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { @@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); } - bool HexagonInstrInfo::invertAndChangeJumpTarget( MachineInstr &MI, MachineBasicBlock *NewTarget) const { DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" @@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget( return true; } - void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ MachineFunction::iterator A = MF.begin(); @@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* --- The code above is used to generate complete set of Hexagon Insn --- */ } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const { return true; } - // Reverse the branch prediction. unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { int PredRevOpcode = -1; @@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { return PredRevOpcode; } - // TODO: Add more rigorous validation. bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) const { return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } - short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const { return Hexagon::xformRegToImmOffset(MI.getOpcode()); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 2d184d1484e9..2358d4b7e4c0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -16,9 +16,14 @@ #include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/Target/TargetInstrInfo.h" +#include +#include #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -29,9 +34,10 @@ struct EVT; class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { - virtual void anchor(); const HexagonRegisterInfo RI; + virtual void anchor(); + public: explicit HexagonInstrInfo(HexagonSubtarget &ST); @@ -260,7 +266,7 @@ public: /// PredCost. unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; /// Create machine specific model for scheduling. DFAPacketizer * @@ -378,7 +384,6 @@ public: bool PredOpcodeHasJMP_c(unsigned Opcode) const; bool predOpcodeHasNot(ArrayRef Cond) const; - short getAbsoluteForm(const MachineInstr &MI) const; unsigned getAddrMode(const MachineInstr &MI) const; unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, @@ -421,13 +426,11 @@ public: unsigned getUnits(const MachineInstr &MI) const; unsigned getValidSubTargets(const unsigned Opcode) const; - /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; - void immediateExtend(MachineInstr &MI) const; bool invertAndChangeJumpTarget(MachineInstr &MI, MachineBasicBlock* NewTarget) const; @@ -438,6 +441,6 @@ public: short xformRegToImmOffset(const MachineInstr &MI) const; }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 371b52108b9b..d83bcbc41553 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -15,33 +15,31 @@ namespace llvm { - namespace Hexagon { +namespace Hexagon { + const unsigned int StartPacket = 0x1; const unsigned int EndPacket = 0x2; - } +} // end namespace Hexagon /// Hexagon target-specific information for each MachineFunction. class HexagonMachineFunctionInfo : public MachineFunctionInfo { // SRetReturnReg - Some subtargets require that sret lowering includes // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; - unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual) - unsigned StackAlignBasePhysReg; // (physical) + unsigned SRetReturnReg = 0; + unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual) + unsigned StackAlignBasePhysReg = 0; // (physical) int VarArgsFrameIndex; - bool HasClobberLR; - bool HasEHReturn; + bool HasClobberLR = false; + bool HasEHReturn = false; std::map PacketInfo; virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0), - StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {} + HexagonMachineFunctionInfo() = default; - HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0), - HasEHReturn(false) {} + HexagonMachineFunctionInfo(MachineFunction &MF) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } @@ -75,6 +73,7 @@ public: void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; } unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index e902f600e881..c9c4f95dbaaa 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -10,17 +10,27 @@ // This file contains the declarations of the HexagonTargetAsmInfo properties. // //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "hexagon-sdata" -#include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -44,13 +54,21 @@ static cl::opt TraceGVPlacement("trace-gv-placement", // (e.g. -debug and -debug-only=globallayout) #define TRACE_TO(s, X) s << X #ifdef NDEBUG -#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } \ + } while (false) #else -#define TRACE(X) \ - do { \ - if (TraceGVPlacement) { TRACE_TO(errs(), X); } \ - else { DEBUG( TRACE_TO(dbgs(), X) ); } \ - } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } else { \ + DEBUG(TRACE_TO(dbgs(), X)); \ + } \ + } while (false) #endif // Returns true if the section name is such that the symbol will be put @@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) { Sec.find(".scommon.") != StringRef::npos; } - static const char *getSectionSuffixForSize(unsigned Size) { switch (Size) { default: @@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM); } - /// Return true if this global value should be placed into small data/bss /// section. bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, @@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, return true; } - bool HexagonTargetObjectFile::isSmallDataEnabled() const { return SmallDataThreshold > 0; } - unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } - /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 5feaffe6efb9..9a09a17767a6 100644 --- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -1,5 +1,4 @@ - -//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===// // // The LLVM Compiler Infrastructure // @@ -11,18 +10,17 @@ // This file is looks at a packet and tries to form compound insns // //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCAssembler.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; using namespace Hexagon; @@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = { }; // enum HexagonII::CompoundGroup -namespace { -unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { +static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; switch (MI.getOpcode()) { @@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return HexagonII::HCG_None; } -} /// getCompoundOp - Return the index from 0-7 into the above opcode lists. -namespace { -unsigned getCompoundOp(MCInst const &HMCI) { +static unsigned getCompoundOp(MCInst const &HMCI) { const MCOperand &Predicate = HMCI.getOperand(0); unsigned PredReg = Predicate.getReg(); @@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) { return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; } } -} -namespace { -MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { - MCInst *CompoundInsn = 0; +static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, + MCInst const &R) { + MCInst *CompoundInsn = nullptr; unsigned compoundOpcode; MCOperand Rs, Rt; int64_t Value; @@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { return CompoundInsn; } -} /// Non-Symmetrical. See if these two instructions are fit for compound pair. -namespace { -bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, - MCInst const &MIb, bool IsExtendedB) { +static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); // We have two candidates - check that this is the same register @@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); } -} -namespace { -bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { +static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, + MCInst &MCI) { assert(HexagonMCInstrInfo::isBundle(MCI)); bool JExtended = false; for (MCInst::iterator J = @@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { JExtended = true; continue; } - if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == - HexagonII::TypeJ) { + if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) { // Try to pair with another insn (B)undled with jump. bool BExtended = false; for (MCInst::iterator B = @@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { } return false; } -} /// tryCompound - Given a bundle check for compound insns when one /// is found update the contents fo the bundle with the compound insn. @@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, // a compound is found. while (lookForCompound(MCII, Context, MCI)) ; - - return; } diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h index 517f17cc9c64..5ece11bd5ce4 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h @@ -1,4 +1,4 @@ -//===--- RDFCopy.h --------------------------------------------------------===// +//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,23 +7,26 @@ // //===----------------------------------------------------------------------===// -#ifndef RDF_COPY_H -#define RDF_COPY_H +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H +#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #include "RDFGraph.h" #include #include namespace llvm { + class MachineBasicBlock; class MachineDominatorTree; class MachineInstr; namespace rdf { + struct CopyPropagation { CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), Trace(false) {} - virtual ~CopyPropagation() {} + + virtual ~CopyPropagation() = default; bool run(); void trace(bool On) { Trace = On; } @@ -49,7 +52,9 @@ namespace rdf { void updateMap(NodeAddr IA); bool scanBlock(MachineBasicBlock *B); }; -} // namespace rdf -} // namespace llvm -#endif +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp index 33c3f03790f3..fa272ea1a76a 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp @@ -10,16 +10,31 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" - #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace rdf; @@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print &P) { return OS; } -namespace { - void printRefHeader(raw_ostream &OS, const NodeAddr RA, - const DataFlowGraph &G) { - OS << Print(RA.Id, G) << '<' - << Print(RA.Addr->getRegRef(G), G) << '>'; - if (RA.Addr->getFlags() & NodeAttrs::Fixed) - OS << '!'; - } +static void printRefHeader(raw_ostream &OS, const NodeAddr RA, + const DataFlowGraph &G) { + OS << Print(RA.Id, G) << '<' + << Print(RA.Addr->getRegRef(G), G) << '>'; + if (RA.Addr->getFlags() & NodeAttrs::Fixed) + OS << '!'; } template<> @@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print &P) { } namespace { + template struct PrintListV { PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} + typedef T Type; const NodeList &List; const DataFlowGraph &G; @@ -201,7 +216,8 @@ namespace { } return OS; } -} + +} // end anonymous namespace template<> raw_ostream &operator<< (raw_ostream &OS, const Print> &P) { @@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS, // Print the target for calls and branches (for readability). if (MI.isCall() || MI.isBranch()) { MachineInstr::const_mop_iterator T = - find_if(MI.operands(), - [] (const MachineOperand &Op) -> bool { - return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); - }); + llvm::find_if(MI.operands(), + [] (const MachineOperand &Op) -> bool { + return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); + }); if (T != MI.operands_end()) { OS << ' '; if (T->isMBB()) @@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS, return OS; } -} // namespace rdf -} // namespace llvm +} // end namespace rdf +} // end namespace llvm // Node allocation functions. // @@ -390,7 +406,6 @@ void NodeAllocator::clear() { ActiveEnd = nullptr; } - // Insert node NA after "this" in the circular chain. void NodeBase::append(NodeAddr NA) { NodeId Nx = Next; @@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr NA) { } } - // Fundamental node manipulator functions. // Obtain the register reference from a reference node. @@ -590,7 +604,6 @@ NodeAddr FuncNode::getEntryBlock(const DataFlowGraph &G) { return findBlock(EntryB, G); } - // Target operand information. // @@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return false; } - RegisterRef RegisterAggr::normalize(RegisterRef RR) const { RegisterId SuperReg = RR.Reg; while (true) { @@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const { OS << " }"; } - // // The data flow graph construction. // @@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const { DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) - : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { + : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { } - // The implementation of the definition stack. // Each register reference has its own definition stack. In particular, // for a register references "Reg" and "Reg:subreg" will each have their @@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { return P; } - // Register information. // Get the list of references aliased to RR. Lane masks are ignored. @@ -915,7 +924,6 @@ NodeAddr DataFlowGraph::cloneNode(const NodeAddr B) { return NA; } - // Allocation routines for specific node types/kinds. NodeAddr DataFlowGraph::newUse(NodeAddr Owner, @@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const { return false; } - // Clear all information in the graph. void DataFlowGraph::reset() { Memory.clear(); @@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() { Func = NodeAddr(); } - // Return the next reference node in the instruction node IA that is related // to RA. Conceptually, two reference nodes are related if they refer to the // same instance of a register access, but differ in flags or other minor diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h index 871062ff2b05..49d78a8b22b5 100644 --- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h +++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h @@ -1,4 +1,4 @@ -//===--- RDFGraph.h -------------------------------------------------------===// +//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -221,20 +221,25 @@ // The statement s5 has two use nodes for t0: u7" and u9". The quotation // mark " indicates that the node is a shadow. // -#ifndef RDF_GRAPH_H -#define RDF_GRAPH_H + +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H +#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include +#include +#include #include #include #include #include +#include #include // RDF uses uint32_t to refer to registers. This is to ensure that the type @@ -243,6 +248,7 @@ static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); namespace llvm { + class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -252,6 +258,7 @@ namespace llvm { class TargetInstrInfo; namespace rdf { + typedef uint32_t NodeId; typedef uint32_t RegisterId; @@ -293,9 +300,11 @@ namespace rdf { static uint16_t set_type(uint16_t A, uint16_t T) { return (A & ~TypeMask) | T; } + static uint16_t set_kind(uint16_t A, uint16_t K) { return (A & ~KindMask) | K; } + static uint16_t set_flags(uint16_t A, uint16_t F) { return (A & ~FlagMask) | F; } @@ -326,9 +335,14 @@ namespace rdf { }; template struct NodeAddr { - NodeAddr() : Addr(nullptr), Id(0) {} + NodeAddr() : Addr(nullptr) {} NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} + // Type cast (casting constructor). The reason for having this class + // instead of std::pair. + template NodeAddr(const NodeAddr &NA) + : Addr(static_cast(NA.Addr)), Id(NA.Id) {} + bool operator== (const NodeAddr &NA) const { assert((Addr == NA.Addr) == (Id == NA.Id)); return Addr == NA.Addr; @@ -336,13 +350,9 @@ namespace rdf { bool operator!= (const NodeAddr &NA) const { return !operator==(NA); } - // Type cast (casting constructor). The reason for having this class - // instead of std::pair. - template NodeAddr(const NodeAddr &NA) - : Addr(static_cast(NA.Addr)), Id(NA.Id) {} T Addr; - NodeId Id; + NodeId Id = 0; }; struct NodeBase; @@ -366,17 +376,20 @@ namespace rdf { struct NodeAllocator { // Amount of storage for a single node. enum { NodeMemSize = 32 }; + NodeAllocator(uint32_t NPB = 4096) : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), - IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) { + IndexMask((1 << BitsPerIndex)-1) { assert(isPowerOf2_32(NPB)); } + NodeBase *ptr(NodeId N) const { uint32_t N1 = N-1; uint32_t BlockN = N1 >> BitsPerIndex; uint32_t Offset = (N1 & IndexMask) * NodeMemSize; return reinterpret_cast(Blocks[BlockN]+Offset); } + NodeId id(const NodeBase *P) const; NodeAddr New(); void clear(); @@ -384,6 +397,7 @@ namespace rdf { private: void startNewBlock(); bool needNewBlock(); + uint32_t makeId(uint32_t Block, uint32_t Index) const { // Add 1 to the id, to avoid the id of 0, which is treated as "null". return ((Block << BitsPerIndex) | Index) + 1; @@ -392,7 +406,7 @@ namespace rdf { const uint32_t NodesPerBlock; const uint32_t BitsPerIndex; const uint32_t IndexMask; - char *ActiveEnd; + char *ActiveEnd = nullptr; std::vector Blocks; typedef BumpPtrAllocatorImpl AllocatorTy; AllocatorTy MemPool; @@ -405,6 +419,7 @@ namespace rdf { RegisterRef() : RegisterRef(0) {} explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} + operator bool() const { return Reg != 0 && Mask.any(); } bool operator== (const RegisterRef &RR) const { return Reg == RR.Reg && Mask == RR.Mask; @@ -420,7 +435,8 @@ namespace rdf { struct TargetOperandInfo { TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} - virtual ~TargetOperandInfo() {} + virtual ~TargetOperandInfo() = default; + virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; @@ -428,7 +444,6 @@ namespace rdf { const TargetInstrInfo &TII; }; - // Packed register reference. Only used for storage. struct PackedRegisterRef { RegisterId Reg; @@ -442,11 +457,13 @@ namespace rdf { template struct IndexedSet { IndexedSet() : Map() { Map.reserve(N); } + T get(uint32_t Idx) const { // Index Idx corresponds to Map[Idx-1]. assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); return Map[Idx-1]; } + uint32_t insert(T Val) { // Linear search. auto F = llvm::find(Map, Val); @@ -455,11 +472,13 @@ namespace rdf { Map.push_back(Val); return Map.size(); // Return actual_index + 1. } + uint32_t find(T Val) const { auto F = llvm::find(Map, Val); assert(F != Map.end()); return F - Map.begin(); } + private: std::vector Map; }; @@ -478,12 +497,14 @@ namespace rdf { assert(LM.any()); return LM.all() ? 0 : find(LM); } + PackedRegisterRef pack(RegisterRef RR) { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } PackedRegisterRef pack(RegisterRef RR) const { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } + RegisterRef unpack(PackedRegisterRef PR) const { return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId)); } @@ -491,11 +512,8 @@ namespace rdf { struct RegisterAggr { RegisterAggr(const TargetRegisterInfo &tri) - : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), - TRI(tri) {} - RegisterAggr(const RegisterAggr &RG) - : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits), - CheckUnits(RG.CheckUnits), TRI(RG.TRI) {} + : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {} + RegisterAggr(const RegisterAggr &RG) = default; bool empty() const { return Masks.empty(); } bool hasAliasOf(RegisterRef RR) const; @@ -530,11 +548,11 @@ namespace rdf { const TargetRegisterInfo &TRI; }; - struct NodeBase { public: // Make sure this is a POD. NodeBase() = default; + uint16_t getType() const { return NodeAttrs::type(Attrs); } uint16_t getKind() const { return NodeAttrs::kind(Attrs); } uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } @@ -596,29 +614,36 @@ namespace rdf { struct RefNode : public NodeBase { RefNode() = default; + RegisterRef getRegRef(const DataFlowGraph &G) const; + MachineOperand &getOp() { assert(!(getFlags() & NodeAttrs::PhiRef)); return *Ref.Op; } + void setRegRef(RegisterRef RR, DataFlowGraph &G); void setRegRef(MachineOperand *Op, DataFlowGraph &G); + NodeId getReachingDef() const { return Ref.RD; } void setReachingDef(NodeId RD) { Ref.RD = RD; } + NodeId getSibling() const { return Ref.Sib; } void setSibling(NodeId Sib) { Ref.Sib = Sib; } + bool isUse() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Use; } + bool isDef() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Def; @@ -702,6 +727,7 @@ namespace rdf { MachineBasicBlock *getCode() const { return CodeNode::getCode(); } + void addPhi(NodeAddr PA, const DataFlowGraph &G); }; @@ -709,6 +735,7 @@ namespace rdf { MachineFunction *getCode() const { return CodeNode::getCode(); } + NodeAddr findBlock(const MachineBasicBlock *BB, const DataFlowGraph &G) const; NodeAddr getEntryBlock(const DataFlowGraph &G); @@ -723,6 +750,7 @@ namespace rdf { template T ptr(NodeId N) const { return static_cast(ptr(N)); } + NodeId id(const NodeBase *P) const; template NodeAddr addr(NodeId N) const { @@ -738,13 +766,17 @@ namespace rdf { struct DefStack { DefStack() = default; + bool empty() const { return Stack.empty() || top() == bottom(); } + private: typedef NodeAddr value_type; struct Iterator { typedef DefStack::value_type value_type; + Iterator &up() { Pos = DS.nextUp(Pos); return *this; } Iterator &down() { Pos = DS.nextDown(Pos); return *this; } + value_type operator*() const { assert(Pos >= 1); return DS.Stack[Pos-1]; @@ -755,14 +787,17 @@ namespace rdf { } bool operator==(const Iterator &It) const { return Pos == It.Pos; } bool operator!=(const Iterator &It) const { return Pos != It.Pos; } + private: Iterator(const DefStack &S, bool Top); + // Pos-1 is the index in the StorageType object that corresponds to // the top of the DefStack. const DefStack &DS; unsigned Pos; friend struct DefStack; }; + public: typedef Iterator iterator; iterator top() const { return Iterator(*this, true); } @@ -773,14 +808,18 @@ namespace rdf { void pop(); void start_block(NodeId N); void clear_block(NodeId N); + private: friend struct Iterator; typedef std::vector StorageType; + bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { return (P.Addr == nullptr) && (N == 0 || P.Id == N); } + unsigned nextUp(unsigned P) const; unsigned nextDown(unsigned P) const; + StorageType Stack; }; @@ -819,6 +858,7 @@ namespace rdf { if (RemoveFromOwner) removeFromOwner(UA); } + void unlinkDef(NodeAddr DA, bool RemoveFromOwner) { unlinkDefDF(DA); if (RemoveFromOwner) @@ -831,23 +871,28 @@ namespace rdf { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == Kind; } + template static bool IsCode(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == Kind; } + static bool IsDef(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Def; } + static bool IsUse(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Use; } + static bool IsPhi(const NodeAddr BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == NodeAttrs::Phi; } + static bool IsPreservingDef(const NodeAddr DA) { uint16_t Flags = DA.Addr->getFlags(); return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); @@ -902,6 +947,7 @@ namespace rdf { void unlinkUseDF(NodeAddr UA); void unlinkDefDF(NodeAddr DA); + void removeFromOwner(NodeAddr RA) { NodeAddr IA = RA.Addr->getOwner(*this); IA.Addr->removeMember(RA, *this); @@ -967,7 +1013,6 @@ namespace rdf { return MM; } - // Optionally print the lane mask, if it is not ~0. struct PrintLaneMaskOpt { PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} @@ -991,7 +1036,9 @@ namespace rdf { PrintNode(const NodeAddr &x, const DataFlowGraph &g) : Print>(x, g) {} }; -} // namespace rdf -} // namespace llvm -#endif // RDF_GRAPH_H +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 6f0fdddd7d55..92d3c001df94 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return MipsDAGToDAGISel::runOnMachineFunction(MF); } +void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + SelectionDAGISel::getAnalysisUsage(AU); +} + void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF) { MachineInstrBuilder MIB(MF, &MI); diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 2a8e5877e848..f89a350cab04 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -28,6 +28,8 @@ private: bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index aa3ffde24b99..2b9195b095e1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, static bool isFunctionGlobalAddress(SDValue Callee); static bool -resideInSameModule(SDValue Callee, Reloc::Model RelMod) { +resideInSameSection(const Function *Caller, SDValue Callee, + const TargetMachine &TM) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); - if (!G) return false; - - const GlobalValue *GV = G->getGlobal(); - - if (GV->isDeclaration()) return false; - - switch(GV->getLinkage()) { - default: llvm_unreachable("unknow linkage type"); - case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::ExternalWeakLinkage: + if (!G) return false; - // Callee with weak linkage is allowed if it has hidden or protected - // visibility - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation - if (GV->hasDefaultVisibility()) - return false; + const GlobalValue *GV = G->getGlobal(); + if (!GV->isStrongDefinitionForLinker()) + return false; - case GlobalValue::ExternalLinkage: - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - break; + // Any explicitly-specified sections and section prefixes must also match. + // Also, if we're using -ffunction-sections, then each function is always in + // a different section (the same is true for COMDAT functions). + if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || + GV->getSection() != Caller->getSection()) + return false; + if (const auto *F = dyn_cast(GV)) { + if (F->getSectionPrefix() != Caller->getSectionPrefix()) + return false; } - // With '-fPIC', calling default visiblity function need insert 'nop' after - // function call, no matter that function resides in same module or not, so - // we treat it as in different module. - if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) + // If the callee might be interposed, then we can't assume the ultimate call + // target will be in the same section. Even in cases where we can assume that + // interposition won't happen, in any case where the linker might insert a + // stub to allow for interposition, we must generate code as though + // interposition might occur. To understand why this matters, consider a + // situation where: a -> b -> c where the arrows indicate calls. b and c are + // in the same section, but a is in a different module (i.e. has a different + // TOC base pointer). If the linker allows for interposition between b and c, + // then it will generate a stub for the call edge between b and c which will + // save the TOC pointer into the designated stack slot allocated by b. If we + // return true here, and therefore allow a tail call between b and c, that + // stack slot won't exist and the b -> c stub will end up saving b'c TOC base + // pointer into the stack slot allocated by a (where the a -> b stub saved + // a's TOC base pointer). If we're not considering a tail call, but rather, + // whether a nop is needed after the call instruction in b, because the linker + // will insert a stub, it might complain about a missing nop if we omit it + // (although many don't complain in this case). + if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; return true; @@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( !isa(Callee)) return false; - // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI - // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another - // module. + // Check if Callee resides in the same section, because for now, PPC64 SVR4 + // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another + // section. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) + if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, return CallOpc; } -static -bool isLocalCall(const SDValue &Callee) -{ - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return G->getGlobal()->isStrongDefinitionForLinker(); - return false; -} - SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall( // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + MachineFunction &MF = DAG.getMachineFunction(); if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { @@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall( // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); - } else if ((CallOpc == PPCISD::CALL) && - (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) + } else if (CallOpc == PPCISD::CALL && + !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; + } } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp index d42e1187ce64..e1825ca1eda1 100644 --- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { EmitFunctionBody(); // Emit the XRay table for this function. - EmitXRayTable(); + emitXRayTable(); // We didn't modify anything. return false; diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 1deefe1231ca..cd690442bb9f 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr : std::next(MBBI); + PI = skipDebugInstructionsBackward(PI, MBB.begin()); + if (NI != nullptr) + NI = skipDebugInstructionsForward(NI, MBB.end()); + unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); + auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); if (!reserveCallFrame) { // If the stack pointer can be changed after prologue, turn the @@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (HasDwarfEHHandlers && !isDestroy && MF.getInfo()->getHasPushSequences()) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); if (Amount == 0) @@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If this is a callee-pop calling convention, emit a CFA adjust for // the amount the callee popped. if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); // Add Amount to SP to destroy a frame, or subtract to setup. @@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // Merge with any previous or following adjustment instruction. Note: the // instructions merged with here do not have CFI, so their stack // adjustments do not feed into CfaAdjustment. - StackAdjustment += mergeSPUpdates(MBB, I, true); - StackAdjustment += mergeSPUpdates(MBB, I, false); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); if (StackAdjustment) { if (!(Fn->optForMinSize() && - adjustStackWithPops(MBB, I, DL, StackAdjustment))) - BuildStackAdjustment(MBB, I, DL, StackAdjustment, + adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) + BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, /*InEpilogue=*/false); } } @@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // TODO: When not using precise CFA, we also need to adjust for the // InternalAmt here. if (CfaAdjustment) { - BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset( - nullptr, CfaAdjustment)); + BuildCFI(MBB, InsertPos, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, + CfaAdjustment)); } } diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index b293dfa98f82..fd2189397279 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + SmallVector WidenedMask; + if (!canWidenShuffleElements(Mask, WidenedMask)) + return SDValue(); + // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. @@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - int MaskLO = Mask[0]; - if (MaskLO == SM_SentinelUndef) - MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; + int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; - int MaskHI = Mask[2]; - if (MaskHI == SM_SentinelUndef) - MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; - - unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; + unsigned PermMask = MaskLO | (MaskHI << 4); // If either input is a zero vector, replace it with an undef input. // Shuffle mask values < 4 are selecting elements of V1. @@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // selecting the zero vector and setting the zero mask bit. if (IsV1Zero) { V1 = DAG.getUNDEF(VT); - if (MaskLO < 4) + if (MaskLO < 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 4) + if (MaskHI < 2) PermMask = (PermMask & 0x0f) | 0x80; } if (IsV2Zero) { V2 = DAG.getUNDEF(VT); - if (MaskLO >= 4) + if (MaskLO >= 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 4) + if (MaskHI >= 2) PermMask = (PermMask & 0x0f) | 0x80; } @@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. @@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); - SmallVector WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); - SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; - // Insure elements came from the same Op. - int MaxOp1Index = VT.getVectorNumElements()/2 - 1; - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if (WidenedMask[i] == SM_SentinelZero) - return SDValue(); - if (WidenedMask[i] == SM_SentinelUndef) + // Check for patterns which can be matched with a single insert of a 256-bit + // subvector. + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 0, 1, 2, 3}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 8, 9, 10, 11})) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } + + assert(WidenedMask.size() == 4); + + // See if this is an insertion of the lower 128-bits of V2 into V1. + bool IsInsert = true; + int V2Index = -1; + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) continue; - SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1; - unsigned OpIndex = (i < Size/2) ? 0 : 1; + // Make sure all V1 subvectors are in place. + if (WidenedMask[i] < 4) { + if (WidenedMask[i] != i) { + IsInsert = false; + break; + } + } else { + // Make sure we only have a single V2 index and its the lowest 128-bits. + if (V2Index >= 0 || WidenedMask[i] != 4) { + IsInsert = false; + break; + } + V2Index = i; + } + } + if (IsInsert && V2Index >= 0) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); + SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, + DAG.getIntPtrConstant(0, DL)); + return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); + } + + // Try to lower to to vshuf64x2/vshuf32x4. + SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; + unsigned PermMask = 0; + // Insure elements came from the same Op. + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) + continue; + + SDValue Op = WidenedMask[i] >= 4 ? V2 : V1; + unsigned OpIndex = i / 2; if (Ops[OpIndex].isUndef()) Ops[OpIndex] = Op; else if (Ops[OpIndex] != Op) return SDValue(); - } - // Form a 128-bit permutation. - // Convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - unsigned ControlBitsNum = WidenedMask.size() / 2; - - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - // Use first element in place of undef mask. - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); + // Convert the 128-bit shuffle mask selection values into 128-bit selection + // bits defined by a vshuf64x2 instruction's immediate control byte. + PermMask |= (WidenedMask[i] % 4) << (i * 2); } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], @@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, static bool canonicalizeShuffleMaskWithCommute(ArrayRef Mask) { int NumElements = Mask.size(); - int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0; + int NumV1Elements = 0, NumV2Elements = 0; for (int M : Mask) if (M < 0) - ++NumSentinelElements; + continue; else if (M < NumElements) ++NumV1Elements; else @@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: - case INSERT_SUBVEC: { + case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - else if (IntrData->Type == INSERT_SUBVEC) { - // imm should be adapted to ISD::INSERT_SUBVECTOR behavior - assert(isa(Src3) && "Expected a ConstantSDNode here!"); - unsigned Imm = cast(Src3)->getZExtValue(); - Imm *= Src2.getSimpleValueType().getVectorNumElements(); - Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); - } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, @@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } + case ISD::INSERT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (VT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast(Op.getOperand(2))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); + DCI.AddToWorklist(Op0.getNode()); + // Op1 needs to be bitcasted to a smaller vector with the same element type. + SDValue Op1 = Op.getOperand(1); + MVT Op1VT = MVT::getVectorVT(EltVT, + Op1.getSimpleValueType().getSizeInBits() / EltSize); + Op1 = DAG.getBitcast(Op1VT, Op1); + DCI.AddToWorklist(Op1.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, Op1, + DAG.getConstant(Imm, DL, MVT::i8))); + return true; + } } return false; @@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify +/// the codegen. +/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) +static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDLoc &DL) { + assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); + SDValue Src = N->getOperand(0); + unsigned Opcode = Src.getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getValueType(); + + auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) { + // TODO: Add extra cases where we can truncate both inputs for the + // cost of one (or none). + // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y ) + if (Op0 == Op1) + return true; + + SDValue BC0 = peekThroughOneUseBitcasts(Op0); + SDValue BC1 = peekThroughOneUseBitcasts(Op1); + return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) || + ISD::isBuildVectorOfConstantSDNodes(BC1.getNode()); + }; + + auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1); + }; + + // Don't combine if the operation has other uses. + if (!N->isOnlyUserOf(Src.getNode())) + return SDValue(); + + // Only support vector truncation for now. + // TODO: i64 scalar math would benefit as well. + if (!VT.isVector()) + return SDValue(); + + // In most cases its only worth pre-truncating if we're only facing the cost + // of one truncation. + // i.e. if one of the inputs will constant fold or the input is repeated. + switch (Opcode) { + case ISD::AND: + case ISD::XOR: + case ISD::OR: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegalOrPromote(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + + case ISD::MUL: + // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its + // better to truncate if we have the chance. + if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && + !TLI.isOperationLegal(Opcode, SrcVT)) + return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); + LLVM_FALLTHROUGH; + case ISD::ADD: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegal(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + } + + return SDValue(); +} + /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, @@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDLoc DL(N); + // Attempt to pre-truncate inputs to arithmetic ops instead. + if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) + return V; + // Try to detect AVG pattern first. if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index da7437ea0ccb..908053e1342d 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -650,33 +650,6 @@ multiclass vextract_for_size; - - // Intrinsic call with masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrk") - To.RC:$src0, - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call with zero-masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrkz") - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call without masking. - def : Pat<(!cast("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), - (!cast(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rr") - From.RC:$src1, imm:$idx)>; } // Codegen pattern for the alternative types @@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, EVEX, VEX_LIG, + defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, EVEX, + defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, PS, EVEX, VEX_LIG, + defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, PD, EVEX, + defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index 9d6a89363044..4cd6ae563f03 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, Sched<[WriteFAddLd, ReadAfterLd]>; } +// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp +multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, + ValueType vt, Operand memop, + ComplexPattern mem_cpat, string OpcodeStr> { + def rr: SI, + Sched<[WriteFAdd]>; + def rm: SI, + Sched<[WriteFAddLd, ReadAfterLd]>; +} + let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, VEX, VEX_LIG; @@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, VEX; + defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, VEX; + defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, VEX; - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss">, PS, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, PD, VEX; + defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, VEX; + defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, VEX; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS; @@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS; - defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD; + defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS; + defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD; - defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, - "comiss">, PS; - defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, PD; + defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS; + defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD; } } // Defs = [EFLAGS] diff --git a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h index 5d2af829028a..415a891bfd97 100755 --- a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h +++ b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h @@ -1,4 +1,4 @@ -//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===// +//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry { // X86 EVEX encoded instructions that have a VEX 128 encoding // (table format: ). -static const X86EvexToVexCompressTableEntry - X86EvexToVex128CompressTable[] = { +static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { // EVEX scalar with corresponding VEX. { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm }, { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr }, @@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry { X86::VUCOMISDZrr , X86::VUCOMISDrr }, { X86::VUCOMISSZrm , X86::VUCOMISSrm }, { X86::VUCOMISSZrr , X86::VUCOMISSrr }, - + { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr }, { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm }, { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr }, { X86::VMOVLHPSZrr , X86::VMOVLHPSrr }, - { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, + { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr }, { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr }, { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr }, { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr }, { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm }, { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr }, - + { X86::VPEXTRBZmr , X86::VPEXTRBmr }, { X86::VPEXTRBZrr , X86::VPEXTRBrr }, { X86::VPEXTRDZmr , X86::VPEXTRDmr }, @@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPEXTRQZrr , X86::VPEXTRQrr }, { X86::VPEXTRWZmr , X86::VPEXTRWmr }, { X86::VPEXTRWZrr , X86::VPEXTRWri }, - + { X86::VPINSRBZrm , X86::VPINSRBrm }, { X86::VPINSRBZrr , X86::VPINSRBrr }, { X86::VPINSRDZrm , X86::VPINSRDrm }, @@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ128rm , X86::VANDPDrm }, { X86::VANDPDZ128rr , X86::VANDPDrr }, { X86::VANDPSZ128rm , X86::VANDPSrm }, - { X86::VANDPSZ128rr , X86::VANDPSrr }, + { X86::VANDPSZ128rr , X86::VANDPSrr }, { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr }, @@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ128rm , X86::VMOVAPDrm }, { X86::VMOVAPDZ128rr , X86::VMOVAPDrr }, { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV }, - { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, - { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, + { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, + { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, { X86::VMOVAPSZ128rr , X86::VMOVAPSrr }, { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV }, { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm }, @@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVUPDZ128rm , X86::VMOVUPDrm }, { X86::VMOVUPDZ128rr , X86::VMOVUPDrr }, { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV }, - { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, - { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, + { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, + { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, { X86::VMOVUPSZ128rr , X86::VMOVUPSrr }, { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV }, { X86::VMULPDZ128rm , X86::VMULPDrm }, @@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr }, { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm }, { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr }, - { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, - { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, - { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, + { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, + { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, + { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr }, { X86::VPERMILPDZ128mi , X86::VPERMILPDmi }, { X86::VPERMILPDZ128ri , X86::VPERMILPDri }, @@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm }, { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr }, { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm }, - { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, + { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, { X86::VPMULDQZ128rm , X86::VPMULDQrm }, { X86::VPMULDQZ128rr , X86::VPMULDQrr }, { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm }, @@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri }, { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi }, { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri }, - { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, + { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, { X86::VPSLLDZ128ri , X86::VPSLLDri }, { X86::VPSLLDZ128rm , X86::VPSLLDrm }, - { X86::VPSLLDZ128rr , X86::VPSLLDrr }, + { X86::VPSLLDZ128rr , X86::VPSLLDrr }, { X86::VPSLLQZ128ri , X86::VPSLLQri }, { X86::VPSLLQZ128rm , X86::VPSLLQrm }, { X86::VPSLLQZ128rr , X86::VPSLLQrr }, @@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry // X86 EVEX encoded instructions that have a VEX 256 encoding // (table format: ). - static const X86EvexToVexCompressTableEntry - X86EvexToVex256CompressTable[] = { + static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = { { X86::VADDPDZ256rm , X86::VADDPDYrm }, { X86::VADDPDZ256rr , X86::VADDPDYrr }, { X86::VADDPSZ256rm , X86::VADDPSYrm }, @@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ256rr , X86::VANDPDYrr }, { X86::VANDPSZ256rm , X86::VANDPSYrm }, { X86::VANDPSZ256rr , X86::VANDPSYrr }, - { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, - { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, - { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, + { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, - { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, + { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr }, { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm }, { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr }, @@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry { X86::VDIVPDZ256rr , X86::VDIVPDYrr }, { X86::VDIVPSZ256rm , X86::VDIVPSYrm }, { X86::VDIVPSZ256rr , X86::VDIVPSYrr }, + { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr }, + { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr }, { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm }, { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr }, { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm }, @@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr }, { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm }, { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr }, + { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr }, + { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr }, { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm }, { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, @@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm }, { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr }, { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV }, - { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, - { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, + { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, + { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr }, { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV }, { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm }, @@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry { X86::VPAVGBZ256rr , X86::VPAVGBYrr }, { X86::VPAVGWZ256rm , X86::VPAVGWYrm }, { X86::VPAVGWZ256rr , X86::VPAVGWYrr }, - { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, - { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, - { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, - { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, - { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, - { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, - { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, - { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, + { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, + { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, + { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, + { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, + { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, + { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, + { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, + { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, { X86::VPERMDZ256rm , X86::VPERMDYrm }, { X86::VPERMDZ256rr , X86::VPERMDYrr }, { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi }, @@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLDQZ256rr , X86::VPSLLDQYri }, { X86::VPSLLDZ256ri , X86::VPSLLDYri }, { X86::VPSLLDZ256rm , X86::VPSLLDYrm }, - { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, + { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, { X86::VPSLLQZ256ri , X86::VPSLLQYri }, { X86::VPSLLQZ256rm , X86::VPSLLQYrm }, { X86::VPSLLQZ256rr , X86::VPSLLQYrr }, @@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr }, { X86::VPSLLWZ256ri , X86::VPSLLWYri }, { X86::VPSLLWZ256rm , X86::VPSLLWYrm }, - { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, + { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, { X86::VPSRADZ256ri , X86::VPSRADYri }, { X86::VPSRADZ256rm , X86::VPSRADYrm }, { X86::VPSRADZ256rr , X86::VPSRADYrr }, @@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSRLDQZ256rr , X86::VPSRLDQYri }, { X86::VPSRLDZ256ri , X86::VPSRLDYri }, { X86::VPSRLDZ256rm , X86::VPSRLDYrm }, - { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, + { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, { X86::VPSRLQZ256ri , X86::VPSRLQYri }, { X86::VPSRLQZ256rm , X86::VPSRLQYrm }, { X86::VPSRLQZ256rr , X86::VPSRLQYrr }, @@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry { X86::VXORPSZ256rr , X86::VXORPSYrr }, }; -#endif \ No newline at end of file +#endif diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h index df47b4ad583d..63a02af02faa 100644 --- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, INSERT_SUBVEC, + EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VGETMANTS, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM, X86ISD::VGETMANTS, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp index 2f69df064e7f..a38a4b30b77d 100644 --- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo OutStreamer->EmitInstruction(TC, getSubtargetInfo()); } -void X86AsmPrinter::EmitXRayTable() { - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - if (Fn->hasComdat()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - } else { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - // Returns instruction preceding MBBI in MachineFunction. // If MBBI is the first instruction of the first basic block, returns null. static MachineBasicBlock::const_iterator diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2b0e672d56f2..d7792e296a58 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -598,197 +598,135 @@ int X86TTIImpl::getArithmeticInstrCost( int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - - if (Kind == TTI::SK_Reverse) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - - static const CostTblEntry AVX512VBMIShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb - }; - - if (ST->hasVBMI()) - if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX512BWShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 - }; - - if (ST->hasBWI()) - if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX512ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd - }; - - if (ST->hasAVX512()) - if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb - }; - - if (ST->hasAVX2()) - if (const auto *Entry = - CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry AVX1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb - // + vinsertf128 - }; - - if (ST->hasAVX()) - if (const auto *Entry = - CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSSE3ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb - }; - - if (ST->hasSSSE3()) - if (const auto *Entry = - CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSE2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw - // + 2*pshufd + 2*unpck + packus - }; - - if (ST->hasSSE2()) - if (const auto *Entry = - CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSE1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps - }; - - if (ST->hasSSE1()) - if (const auto *Entry = - CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - } else if (Kind == TTI::SK_Alternate) { + if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - // The backend knows how to generate a single VEX.256 version of - // instruction VPBLENDW if the target supports AVX2. - if (ST->hasAVX2() && LT.second == MVT::v16i16) - return LT.first; + static const CostTblEntry AVX512VBMIShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb + { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb + }; - static const CostTblEntry AVXAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd - {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd + if (ST->hasVBMI()) + if (const auto *Entry = + CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps - {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps + static const CostTblEntry AVX512BWShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 + // + 2*pshufb + vinserti64x4 + }; - // This shuffle is custom lowered into a sequence of: - // 2x vextractf128 , 2x vpblendw , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5}, + if (ST->hasBWI()) + if (const auto *Entry = + CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; - // This shuffle is custom lowered into a long sequence of: - // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} + static const CostTblEntry AVX512ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd + }; + + if (ST->hasAVX512()) + if (const auto *Entry = + CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry AVX2ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb + + { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw + { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb + }; + + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry AVX1ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + + { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor }; if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSE41AltShuffleTbl[] = { - // These are lowered into movsd. - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - - // packed float vectors with four elements are lowered into BLENDI dag - // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'. - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, - - // This shuffle generates a single pshufw. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, - - // There is no instruction that matches a v16i8 alternate shuffle. - // The backend will expand it into the sequence 'pshufb + pshufb + or'. - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} + static const CostTblEntry SSE41ShuffleTbl[] = { + { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps + { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb }; if (ST->hasSSE41()) - if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, - LT.second)) + if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSSE3AltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + static const CostTblEntry SSSE3ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb + { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb - // SSE3 doesn't have 'blendps'. The following shuffles are expanded into - // the sequence 'shufps + pshufd' - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por }; if (ST->hasSSSE3()) - if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry SSEAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + static const CostTblEntry SSE2ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd + { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd + { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + packus - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd - - // This is expanded into a long sequence of four extract + four insert. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. - - // 8 x (pinsrw + pextrw + and + movb + movzb + or) - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} + { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por }; - // Fall-back (SSE3 and SSE2). - if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry SSE1ShuffleTbl[] = { + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps + }; + + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; } else if (Kind == TTI::SK_PermuteTwoSrc) { // We assume that source and destination have the same vector type. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 3bbc70ab21c6..55151c13b430 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1057,6 +1057,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // add(zext(xor i16 X, -32768), -32768) --> sext X return CastInst::Create(Instruction::SExt, X, LHS->getType()); } + + if (Val->isNegative() && + match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) && + Val->sge(-C->sext(Val->getBitWidth()))) { + // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val)) + return CastInst::Create( + Instruction::ZExt, + Builder->CreateNUWAdd( + X, Constant::getIntegerValue(X->getType(), + *C + Val->trunc(C->getBitWidth()))), + I.getType()); + } } // FIXME: Use the match above instead of dyn_cast to allow these transforms diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 92369bd70b13..f863d192fc2f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1581,6 +1581,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return replaceInstUsesWith(*II, V); break; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + + // Canonicalize constants into the RHS. + if (isa(Src0) && !isa(Src1)) { + II->setArgOperand(0, Src1); + II->setArgOperand(1, Src0); + std::swap(Src0, Src1); + } + + Value *LHS = nullptr; + Value *RHS = nullptr; + + // fma fneg(x), fneg(y), z -> fma x, y, z + if (match(Src0, m_FNeg(m_Value(LHS))) && + match(Src1, m_FNeg(m_Value(RHS)))) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, RHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma fabs(x), fabs(x), z -> fma x, x, z + if (match(Src0, m_Intrinsic(m_Value(LHS))) && + match(Src1, m_Intrinsic(m_Value(RHS))) && LHS == RHS) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + {LHS, LHS, II->getArgOperand(2)}); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma x, 1, z -> fadd x, z + if (match(Src1, m_FPOne())) { + Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + RI->copyFastMathFlags(II); + return RI; + } + + break; + } + case Intrinsic::fabs: { + Value *Cond; + Constant *LHS, *RHS; + if (match(II->getArgOperand(0), + m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { + CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + return SelectInst::Create(Cond, Call0, Call1); + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. @@ -2669,24 +2725,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // assume( (load addr) != null ) -> add 'nonnull' metadata to load // (if assume is valid at the load) - if (ICmpInst* ICmp = dyn_cast(IIOperand)) { - Value *LHS = ICmp->getOperand(0); - Value *RHS = ICmp->getOperand(1); - if (ICmpInst::ICMP_NE == ICmp->getPredicate() && - isa(LHS) && - isa(RHS) && - RHS->getType()->isPointerTy() && - cast(RHS)->isNullValue()) { - LoadInst* LI = cast(LHS); - if (isValidAssumeForContext(II, LI, &DT)) { - MDNode *MD = MDNode::get(II->getContext(), None); - LI->setMetadata(LLVMContext::MD_nonnull, MD); - return eraseInstFromFunction(*II); - } - } + CmpInst::Predicate Pred; + Instruction *LHS; + if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && + Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && + LHS->getType()->isPointerTy() && + isValidAssumeForContext(II, LHS, &DT)) { + MDNode *MD = MDNode::get(II->getContext(), None); + LHS->setMetadata(LLVMContext::MD_nonnull, MD); + return eraseInstFromFunction(*II); + // TODO: apply nonnull return attributes to calls and invokes // TODO: apply range metadata for range check patterns? } + // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. APInt KnownZero(1, 0), KnownOne(1, 0); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5276bee4e0a2..388c5e4e7fa4 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -850,20 +850,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // separated by a few arithmetic operations. BasicBlock::iterator BBI(LI); bool IsLoadCSE = false; - if (Value *AvailableVal = - FindAvailableLoadedValue(&LI, LI.getParent(), BBI, - DefMaxInstsToScan, AA, &IsLoadCSE)) { - if (IsLoadCSE) { - LoadInst *NLI = cast(AvailableVal); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_range, - LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, - LLVMContext::MD_invariant_group, LLVMContext::MD_align, - LLVMContext::MD_dereferenceable, - LLVMContext::MD_dereferenceable_or_null}; - combineMetadata(NLI, &LI, KnownIDs); - }; + if (Value *AvailableVal = FindAvailableLoadedValue( + &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) { + if (IsLoadCSE) + combineMetadataForCSE(cast(AvailableVal), &LI); return replaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index bc38c4aca348..5ad2a1c0e3e6 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -731,6 +731,25 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { if (ConstantInt *Op1C = dyn_cast(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); + // Turn: + // %zext = zext i32 %V to i64 + // %res = shl i64 %V, 8 + // + // Into: + // %shl = shl i32 %V, 8 + // %res = zext i32 %shl to i64 + // + // This is only valid if %V would have zeros shifted out. + if (auto *ZI = dyn_cast(I.getOperand(0))) { + unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits(); + if (ShAmt < SrcBitWidth && + MaskedValueIsZero(ZI->getOperand(0), + APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) { + auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt); + return new ZExtInst(Shl, I.getType()); + } + } + // If the shifted-out value is known-zero, then this is a NUW shift. if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 9bf638dcbae3..16e08ee58fbe 100644 --- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -481,9 +481,9 @@ private: bool processNode(DomTreeNode *Node); Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { - if (LoadInst *LI = dyn_cast(Inst)) + if (auto *LI = dyn_cast(Inst)) return LI; - else if (StoreInst *SI = dyn_cast(Inst)) + if (auto *SI = dyn_cast(Inst)) return SI->getValueOperand(); assert(isa(Inst) && "Instruction not supported"); return TTI.getOrCreateResultFromMemIntrinsic(cast(Inst), diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index dee61b77412e..8b8236390bf4 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -79,6 +79,7 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); +STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN"); //===----------------------------------------------------------------------===// // GVN Pass @@ -714,16 +715,15 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - // If this store's memorydef stores the same value as the last store, the - // memory accesses are equivalent. - // Get the expression, if any, for the RHS of the MemoryDef. MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI); - MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( - cast(StoreAccess)->getDefiningAccess()); - const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); - // See if this store expression already has a value, and it's the same as our - // current store. FIXME: Right now, we only do this for simple stores. + // See if we are defined by a previous store expression, it already has a + // value, and it's the same value as our current store. FIXME: Right now, we + // only do this for simple stores, we should expand to cover memcpys, etc. if (SI->isSimple()) { + // Get the expression, if any, for the RHS of the MemoryDef. + MemoryAccess *StoreRHS = lookupMemoryAccessEquiv( + cast(StoreAccess)->getDefiningAccess()); + const Expression *OldStore = createStoreExpression(SI, StoreRHS, B); CongruenceClass *CC = ExpressionToClass.lookup(OldStore); if (CC && CC->DefiningExpr && isa(CC->DefiningExpr) && CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B)) @@ -1092,23 +1092,16 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { if (auto *I = dyn_cast(V)) { if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { // If this is a MemoryDef, we need to update the equivalence table. If - // we - // determined the expression is congruent to a different memory state, - // use that different memory state. If we determined it didn't, we - // update - // that as well. Note that currently, we do not guarantee the - // "different" memory state dominates us. The goal is to make things - // that are congruent look congruent, not ensure we can eliminate one in - // favor of the other. - // Right now, the only way they can be equivalent is for store - // expresions. - if (!isa(MA)) { - if (E && isa(E) && EClass->Members.size() != 1) { - auto *DefAccess = cast(E)->getDefiningAccess(); - setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); - } else { - setMemoryAccessEquivTo(MA, nullptr); - } + // we determined the expression is congruent to a different memory + // state, use that different memory state. If we determined it didn't, + // we update that as well. Right now, we only support store + // expressions. + if (!isa(MA) && isa(E) && + EClass->Members.size() != 1) { + auto *DefAccess = cast(E)->getDefiningAccess(); + setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr); + } else { + setMemoryAccessEquivTo(MA, nullptr); } markMemoryUsersTouched(MA); } @@ -1391,7 +1384,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) { } else { // Handle terminators that return values. All of them produce values we // don't currently understand. - if (!I->getType()->isVoidTy()){ + if (!I->getType()->isVoidTy()) { auto *Symbolized = createUnknownExpression(I); performCongruenceFinding(I, Symbolized); } @@ -1427,14 +1420,12 @@ void NewGVN::verifyMemoryCongruency() { continue; if (auto *FirstMUD = dyn_cast(KV.first)) { auto *SecondMUD = dyn_cast(KV.second); - if (FirstMUD && SecondMUD) { - auto *FirstInst = FirstMUD->getMemoryInst(); - auto *SecondInst = SecondMUD->getMemoryInst(); + if (FirstMUD && SecondMUD) assert( - ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) && + ValueToClass.lookup(FirstMUD->getMemoryInst()) == + ValueToClass.lookup(SecondMUD->getMemoryInst()) && "The instructions for these memory operations should have been in " "the same congruence class"); - } } else if (auto *FirstMP = dyn_cast(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have @@ -1538,9 +1529,11 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, initializeCongruenceClasses(F); + unsigned int Iterations = 0; // We start out in the entry block. BasicBlock *LastBlock = &F.getEntryBlock(); while (TouchedInstructions.any()) { + ++Iterations; // Walk through all the instructions in all the blocks in RPO. for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; InstrNum = TouchedInstructions.find_next(InstrNum)) { @@ -1587,8 +1580,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, TouchedInstructions.reset(InstrNum); } } - -// FIXME: Move this to expensive checks when we are satisfied with NewGVN + NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); #ifndef NDEBUG verifyMemoryCongruency(); #endif @@ -2070,7 +2062,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // Cleanup the congruence class. SmallPtrSet MembersLeft; - for (Value * Member : CC->Members) { + for (Value *Member : CC->Members) { if (Member->getType()->isVoidTy()) { MembersLeft.insert(Member); continue; diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index a2ceded106b4..a40079ca8e76 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -760,7 +760,7 @@ static void PropagateParallelLoopAccessMetadata(CallSite CS, /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks -/// have different "unqiue scopes" at every call site. Were this not done, then +/// have different "unique scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index dc526a20c903..842cf31f2e3d 100644 --- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -335,10 +335,12 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); uint64_t TrueWeight, FalseWeight; - uint64_t ExitWeight = 0, BackEdgeWeight = 0; + uint64_t ExitWeight = 0, CurHeaderWeight = 0; if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) { ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight; + // The # of times the loop body executes is the sum of the exit block + // weight and the # of times the backedges are taken. + CurHeaderWeight = TrueWeight + FalseWeight; } // For each peeled-off iteration, make a copy of the loop. @@ -346,15 +348,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, SmallVector NewBlocks; ValueToValueMapTy VMap; - // The exit weight of the previous iteration is the header entry weight - // of the current iteration. So this is exactly how many dynamic iterations - // the current peeled-off static iteration uses up. + // Subtract the exit weight from the current header weight -- the exit + // weight is exactly the weight of the previous iteration's header. // FIXME: due to the way the distribution is constructed, we need a // guard here to make sure we don't end up with non-positive weights. - if (ExitWeight < BackEdgeWeight) - BackEdgeWeight -= ExitWeight; + if (ExitWeight < CurHeaderWeight) + CurHeaderWeight -= ExitWeight; else - BackEdgeWeight = 1; + CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, NewBlocks, LoopBlocks, VMap, LVMap, LI); @@ -388,6 +389,14 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, // Adjust the branch weights on the loop exit. if (ExitWeight) { + // The backedge count is the difference of current header weight and + // current loop exit weight. If the current header weight is smaller than + // the current loop exit weight, we mark the loop backedge weight as 1. + uint64_t BackEdgeWeight = 0; + if (ExitWeight < CurHeaderWeight) + BackEdgeWeight = CurHeaderWeight - ExitWeight; + else + BackEdgeWeight = 1; MDBuilder MDB(LatchBR->getContext()); MDNode *WeightNode = HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3846b21c502e..54390e77bb1f 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1574,12 +1574,20 @@ static bool sinkLastInstruction(ArrayRef Blocks) { I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - // Update metadata and IR flags. + // The debug location for the "common" instruction is the merged locations of + // all the commoned instructions. We start with the original location of the + // "common" instruction and iteratively merge each location in the loop below. + DILocation *Loc = I0->getDebugLoc(); + + // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { + Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); combineMetadataForCSE(I0, I); I0->andIRFlags(I); } + if (!isa(I0)) + I0->setDebugLoc(Loc); if (!isa(I0)) { // canSinkLastInstruction checked that all instructions were used by diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h index 47998859f674..e9d9ab03a8b0 100644 --- a/contrib/llvm/tools/clang/include/clang-c/Index.h +++ b/contrib/llvm/tools/clang/include/clang-c/Index.h @@ -2366,7 +2366,11 @@ enum CXCursorKind { */ CXCursor_OMPTargetTeamsDistributeParallelForDirective = 277, - CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForDirective, + /** \brief OpenMP target teams distribute parallel for simd directive. + */ + CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective = 278, + + CXCursor_LastStmt = CXCursor_OMPTargetTeamsDistributeParallelForSimdDirective, /** * \brief Cursor that represents the translation unit itself. diff --git a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h index c0b0400cb88c..cbf0bee69f00 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/contrib/llvm/tools/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2669,6 +2669,9 @@ DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeDirective, DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPTargetTeamsDistributeParallelForSimdDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + // OpenMP clauses. template bool RecursiveASTVisitor::TraverseOMPClause(OMPClause *C) { diff --git a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h index 80300dae80df..61cae7b6d258 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h +++ b/contrib/llvm/tools/clang/include/clang/AST/StmtOpenMP.h @@ -3638,6 +3638,79 @@ public: } }; +/// This represents '#pragma omp target teams distribute parallel for simd' +/// combined directive. +/// +/// \code +/// #pragma omp target teams distribute parallel for simd private(x) +/// \endcode +/// In this example directive '#pragma omp target teams distribute parallel +/// for simd' has clause 'private' with the variables 'x' +/// +class OMPTargetTeamsDistributeParallelForSimdDirective final + : public OMPLoopDirective { + friend class ASTStmtReader; + + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + OMPTargetTeamsDistributeParallelForSimdDirective(SourceLocation StartLoc, + SourceLocation EndLoc, + unsigned CollapsedNum, + unsigned NumClauses) + : OMPLoopDirective(this, + OMPTargetTeamsDistributeParallelForSimdDirectiveClass, + OMPD_target_teams_distribute_parallel_for_simd, + StartLoc, EndLoc, CollapsedNum, NumClauses) {} + + /// Build an empty directive. + /// + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + explicit OMPTargetTeamsDistributeParallelForSimdDirective( + unsigned CollapsedNum, unsigned NumClauses) + : OMPLoopDirective( + this, OMPTargetTeamsDistributeParallelForSimdDirectiveClass, + OMPD_target_teams_distribute_parallel_for_simd, SourceLocation(), + SourceLocation(), CollapsedNum, NumClauses) {} + +public: + /// Creates directive with a list of \a Clauses. + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param CollapsedNum Number of collapsed loops. + /// \param Clauses List of clauses. + /// \param AssociatedStmt Statement, associated with the directive. + /// \param Exprs Helper expressions for CodeGen. + /// + static OMPTargetTeamsDistributeParallelForSimdDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, + Stmt *AssociatedStmt, const HelperExprs &Exprs); + + /// Creates an empty directive with the place for \a NumClauses clauses. + /// + /// \param C AST context. + /// \param CollapsedNum Number of collapsed nested loops. + /// \param NumClauses Number of clauses. + /// + static OMPTargetTeamsDistributeParallelForSimdDirective * + CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, + EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == + OMPTargetTeamsDistributeParallelForSimdDirectiveClass; + } +}; + } // end namespace clang #endif diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index c4e4e2b60192..7f249c434ad6 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3921,6 +3921,8 @@ def ext_ms_deref_template_argument: ExtWarn< def ext_ms_delayed_template_argument: ExtWarn< "using the undeclared type %0 as a default template argument is a " "Microsoft extension">, InGroup; +def err_template_arg_deduced_incomplete_pack : Error< + "deduced incomplete pack %0 for template parameter %1">; // C++ template specialization def err_template_spec_unknown_kind : Error< diff --git a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def index 808e0d2bb0c7..58b54ce0bcd6 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/OpenMPKinds.def @@ -162,6 +162,9 @@ #ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name) #endif +#ifndef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE +#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name) +#endif // OpenMP directives. OPENMP_DIRECTIVE(threadprivate) @@ -214,6 +217,7 @@ OPENMP_DIRECTIVE_EXT(teams_distribute_parallel_for, "teams distribute parallel f OPENMP_DIRECTIVE_EXT(target_teams, "target teams") OPENMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute") OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, "target teams distribute parallel for") +OPENMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, "target teams distribute parallel for simd") // OpenMP clauses. OPENMP_CLAUSE(if, OMPIfClause) @@ -793,6 +797,33 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(proc_bind) OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(schedule) OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear) +// Clauses allowed for OpenMP directive +// 'target teams distribute parallel for simd'. +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(if) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(device) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(map) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(private) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(nowait) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(depend) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(defaultmap) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(firstprivate) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(is_device_ptr) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(default) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(shared) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(reduction) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_teams) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(thread_limit) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(lastprivate) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(collapse) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(dist_schedule) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(num_threads) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(proc_bind) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(schedule) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(linear) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(aligned) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(safelen) +OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(simdlen) + #undef OPENMP_TASKLOOP_SIMD_CLAUSE #undef OPENMP_TASKLOOP_CLAUSE #undef OPENMP_LINEAR_KIND @@ -843,3 +874,4 @@ OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(linear) #undef OPENMP_TARGET_TEAMS_CLAUSE #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_CLAUSE #undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE +#undef OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE diff --git a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td index d8eed553d86e..2e92e5006ff4 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/StmtNodes.td @@ -243,3 +243,4 @@ def OMPTeamsDistributeParallelForDirective : DStmt; def OMPTargetTeamsDirective : DStmt; def OMPTargetTeamsDistributeDirective : DStmt; def OMPTargetTeamsDistributeParallelForDirective : DStmt; +def OMPTargetTeamsDistributeParallelForSimdDirective : DStmt; diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index 1f1222e10636..7f4e59a2d233 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -882,7 +882,7 @@ def fms_compatibility_version "(default))">; def fdelayed_template_parsing : Flag<["-"], "fdelayed-template-parsing">, Group, HelpText<"Parse templated function definitions at the end of the " - "translation unit">, Flags<[CC1Option]>; + "translation unit">, Flags<[CC1Option, CoreOption]>; def fms_memptr_rep_EQ : Joined<["-"], "fms-memptr-rep=">, Group, Flags<[CC1Option]>; def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group, Flags<[DriverOption, CC1Option]>, MetaVarName<"">, @@ -1031,7 +1031,8 @@ def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group, Flags<[CoreOption]>; def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group, Flags<[CoreOption]>; -def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group; +def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group, + Flags<[DriverOption, CoreOption]>; def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group; def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group; def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group, Flags<[CC1Option]>; @@ -1331,6 +1332,12 @@ def funique_section_names : Flag <["-"], "funique-section-names">, def fno_unique_section_names : Flag <["-"], "fno-unique-section-names">, Group, Flags<[CC1Option]>; +def fstrict_return : Flag<["-"], "fstrict-return">, Group, + Flags<[CC1Option]>, + HelpText<"Always treat control flow paths that fall off the end of a non-void" + "function as unreachable">; +def fno_strict_return : Flag<["-"], "fno-strict-return">, Group, + Flags<[CC1Option]>; def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group, Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">; diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def index 1f0c83b5bfa7..54c9f81265a6 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def +++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def @@ -251,6 +251,10 @@ CODEGENOPT(DiagnosticsWithHotness, 1, 0) /// Whether copy relocations support is available when building as PIE. CODEGENOPT(PIECopyRelocations, 1, 0) +/// Whether we should use the undefined behaviour optimization for control flow +/// paths that reach the end of a function without executing a required return. +CODEGENOPT(StrictReturn, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index 82caaeb24ae7..f0999f68470c 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -8514,6 +8514,12 @@ public: ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc, llvm::DenseMap &VarsWithImplicitDSA); + /// Called on well-formed '\#pragma omp target teams distribute parallel for + /// simd' after parsing of the associated statement. + StmtResult ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, + llvm::DenseMap &VarsWithImplicitDSA); /// Checks correctness of linear modifiers. bool CheckOpenMPLinearModifier(OpenMPLinearClauseKind LinKind, diff --git a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h index 861fe64096af..61e2f18045ea 100644 --- a/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h +++ b/contrib/llvm/tools/clang/include/clang/Serialization/ASTBitCodes.h @@ -1516,6 +1516,7 @@ namespace clang { STMT_OMP_TARGET_TEAMS_DIRECTIVE, STMT_OMP_TARGET_TEAMS_DISTRIBUTE_DIRECTIVE, STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE, + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE, EXPR_OMP_ARRAY_SECTION, // ARC diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h new file mode 100644 index 000000000000..d657f16df183 --- /dev/null +++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Diagnostic.h @@ -0,0 +1,100 @@ +//===--- Diagnostic.h - Framework for clang diagnostics tools --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// Structures supporting diagnostics and refactorings that span multiple +// translation units. Indicate diagnostics reports and replacements +// suggestions for the analyzed sources. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H +#define LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H + +#include "Replacement.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace tooling { + +/// \brief Represents the diagnostic message with the error message associated +/// and the information on the location of the problem. +struct DiagnosticMessage { + DiagnosticMessage(llvm::StringRef Message = ""); + + /// \brief Constructs a diagnostic message with anoffset to the diagnostic + /// within the file where the problem occured. + /// + /// \param Loc Should be a file location, it is not meaningful for a macro + /// location. + /// + DiagnosticMessage(llvm::StringRef Message, const SourceManager &Sources, + SourceLocation Loc); + std::string Message; + std::string FilePath; + unsigned FileOffset; +}; + +/// \brief Represents the diagnostic with the level of severity and possible +/// fixes to be applied. +struct Diagnostic { + enum Level { + Warning = DiagnosticsEngine::Warning, + Error = DiagnosticsEngine::Error + }; + + Diagnostic() = default; + + Diagnostic(llvm::StringRef DiagnosticName, Level DiagLevel, + StringRef BuildDirectory); + + Diagnostic(llvm::StringRef DiagnosticName, DiagnosticMessage &Message, + llvm::StringMap &Fix, + SmallVector &Notes, Level DiagLevel, + llvm::StringRef BuildDirectory); + + /// \brief Name identifying the Diagnostic. + std::string DiagnosticName; + + /// \brief Message associated to the diagnostic. + DiagnosticMessage Message; + + /// \brief Fixes to apply, grouped by file path. + llvm::StringMap Fix; + + /// \brief Potential notes about the diagnostic. + SmallVector Notes; + + /// \brief Diagnostic level. Can indicate either an error or a warning. + Level DiagLevel; + + /// \brief A build directory of the diagnostic source file. + /// + /// It's an absolute path which is `directory` field of the source file in + /// compilation database. If users don't specify the compilation database + /// directory, it is the current directory where clang-tidy runs. + /// + /// Note: it is empty in unittest. + std::string BuildDirectory; +}; + +/// \brief Collection of Diagnostics generated from a single translation unit. +struct TranslationUnitDiagnostics { + /// Name of the main source for the translation unit. + std::string MainSourceFile; + std::vector Diagnostics; +}; + +} // end namespace tooling +} // end namespace clang +#endif // LLVM_CLANG_TOOLING_CORE_DIAGNOSTIC_H diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h index 95dc3cd6e763..8d4a22adf368 100644 --- a/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h +++ b/contrib/llvm/tools/clang/include/clang/Tooling/Core/Replacement.h @@ -329,12 +329,6 @@ llvm::Expected applyAllReplacements(StringRef Code, struct TranslationUnitReplacements { /// Name of the main source for the translation unit. std::string MainSourceFile; - - /// A freeform chunk of text to describe the context of the replacements. - /// Will be printed, for example, when detecting conflicts during replacement - /// deduplication. - std::string Context; - std::vector Replacements; }; diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h new file mode 100644 index 000000000000..f32b9fa9c94b --- /dev/null +++ b/contrib/llvm/tools/clang/include/clang/Tooling/DiagnosticsYaml.h @@ -0,0 +1,101 @@ +//===-- DiagnosticsYaml.h -- Serialiazation for Diagnosticss ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file defines the structure of a YAML document for serializing +/// diagnostics. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H +#define LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H + +#include "clang/Tooling/Core/Diagnostic.h" +#include "clang/Tooling/ReplacementsYaml.h" +#include "llvm/Support/YAMLTraits.h" +#include + +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::tooling::Diagnostic) + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits { + /// \brief Helper to (de)serialize a Diagnostic since we don't have direct + /// access to its data members. + class NormalizedDiagnostic { + public: + NormalizedDiagnostic(const IO &) + : DiagLevel(clang::tooling::Diagnostic::Level::Warning) {} + + NormalizedDiagnostic(const IO &, const clang::tooling::Diagnostic &D) + : DiagnosticName(D.DiagnosticName), Message(D.Message), Fix(D.Fix), + Notes(D.Notes), DiagLevel(D.DiagLevel), + BuildDirectory(D.BuildDirectory) {} + + clang::tooling::Diagnostic denormalize(const IO &) { + return clang::tooling::Diagnostic(DiagnosticName, Message, Fix, Notes, + DiagLevel, BuildDirectory); + } + + std::string DiagnosticName; + clang::tooling::DiagnosticMessage Message; + llvm::StringMap Fix; + SmallVector Notes; + clang::tooling::Diagnostic::Level DiagLevel; + std::string BuildDirectory; + }; + + static void mapping(IO &Io, clang::tooling::Diagnostic &D) { + MappingNormalization Keys( + Io, D); + Io.mapRequired("DiagnosticName", Keys->DiagnosticName); + + // FIXME: Export properly all the different fields. + + std::vector Fixes; + for (auto &Replacements : Keys->Fix) { + for (auto &Replacement : Replacements.second) { + Fixes.push_back(Replacement); + } + } + Io.mapRequired("Replacements", Fixes); + for (auto &Fix : Fixes) { + llvm::Error Err = Keys->Fix[Fix.getFilePath()].add(Fix); + if (Err) { + // FIXME: Implement better conflict handling. + llvm::errs() << "Fix conflicts with existing fix: " + << llvm::toString(std::move(Err)) << "\n"; + } + } + } +}; + +/// \brief Specialized MappingTraits to describe how a +/// TranslationUnitDiagnostics is (de)serialized. +template <> struct MappingTraits { + static void mapping(IO &Io, clang::tooling::TranslationUnitDiagnostics &Doc) { + Io.mapRequired("MainSourceFile", Doc.MainSourceFile); + + std::vector Diagnostics; + for (auto &Diagnostic : Doc.Diagnostics) { + // FIXME: Export all diagnostics, not just the ones with fixes. + // Update MappingTraits::mapping. + if (Diagnostic.Fix.size() > 0) { + Diagnostics.push_back(Diagnostic); + } + } + Io.mapRequired("Diagnostics", Diagnostics); + Doc.Diagnostics = Diagnostics; + } +}; +} // end namespace yaml +} // end namespace llvm + +#endif // LLVM_CLANG_TOOLING_DIAGNOSTICSYAML_H diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h b/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h index 47b7f3f9a534..0b1dc4c77423 100644 --- a/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h +++ b/contrib/llvm/tools/clang/include/clang/Tooling/ReplacementsYaml.h @@ -65,7 +65,6 @@ template <> struct MappingTraits { static void mapping(IO &Io, clang::tooling::TranslationUnitReplacements &Doc) { Io.mapRequired("MainSourceFile", Doc.MainSourceFile); - Io.mapOptional("Context", Doc.Context, std::string()); Io.mapRequired("Replacements", Doc.Replacements); } }; diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp index b3f8925b6464..6dcb705c44d3 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp @@ -7192,6 +7192,12 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, CharUnits &EndOffset) { bool DetermineForCompleteObject = refersToCompleteObject(LVal); + auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { + if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) + return false; + return HandleSizeof(Info, ExprLoc, Ty, Result); + }; + // We want to evaluate the size of the entire object. This is a valid fallback // for when Type=1 and the designator is invalid, because we're asked for an // upper-bound. @@ -7209,7 +7215,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, return false; QualType BaseTy = getObjectType(LVal.getLValueBase()); - return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset); + return CheckedHandleSizeof(BaseTy, EndOffset); } // We want to evaluate the size of a subobject. @@ -7238,7 +7244,7 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, } CharUnits BytesPerElem; - if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem)) + if (!CheckedHandleSizeof(Designator.MostDerivedType, BytesPerElem)) return false; // According to the GCC documentation, we want the size of the subobject diff --git a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp index 0a90740162b9..a7c71bb5f45c 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtOpenMP.cpp @@ -1659,3 +1659,64 @@ OMPTargetTeamsDistributeParallelForDirective::CreateEmpty(const ASTContext &C, return new (Mem) OMPTargetTeamsDistributeParallelForDirective(CollapsedNum, NumClauses); } + +OMPTargetTeamsDistributeParallelForSimdDirective * +OMPTargetTeamsDistributeParallelForSimdDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + unsigned CollapsedNum, ArrayRef Clauses, Stmt *AssociatedStmt, + const HelperExprs &Exprs) { + auto Size = + llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective), + alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * Clauses.size() + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, + OMPD_target_teams_distribute_parallel_for_simd)); + OMPTargetTeamsDistributeParallelForSimdDirective *Dir = + new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective( + StartLoc, EndLoc, CollapsedNum, Clauses.size()); + Dir->setClauses(Clauses); + Dir->setAssociatedStmt(AssociatedStmt); + Dir->setIterationVariable(Exprs.IterationVarRef); + Dir->setLastIteration(Exprs.LastIteration); + Dir->setCalcLastIteration(Exprs.CalcLastIteration); + Dir->setPreCond(Exprs.PreCond); + Dir->setCond(Exprs.Cond); + Dir->setInit(Exprs.Init); + Dir->setInc(Exprs.Inc); + Dir->setIsLastIterVariable(Exprs.IL); + Dir->setLowerBoundVariable(Exprs.LB); + Dir->setUpperBoundVariable(Exprs.UB); + Dir->setStrideVariable(Exprs.ST); + Dir->setEnsureUpperBound(Exprs.EUB); + Dir->setNextLowerBound(Exprs.NLB); + Dir->setNextUpperBound(Exprs.NUB); + Dir->setNumIterations(Exprs.NumIterations); + Dir->setPrevLowerBoundVariable(Exprs.PrevLB); + Dir->setPrevUpperBoundVariable(Exprs.PrevUB); + Dir->setCounters(Exprs.Counters); + Dir->setPrivateCounters(Exprs.PrivateCounters); + Dir->setInits(Exprs.Inits); + Dir->setUpdates(Exprs.Updates); + Dir->setFinals(Exprs.Finals); + Dir->setPreInits(Exprs.PreInits); + return Dir; +} + +OMPTargetTeamsDistributeParallelForSimdDirective * +OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty( + const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum, + EmptyShell) { + auto Size = + llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForSimdDirective), + alignof(OMPClause *)); + void *Mem = C.Allocate( + Size + sizeof(OMPClause *) * NumClauses + + sizeof(Stmt *) * + numLoopChildren(CollapsedNum, + OMPD_target_teams_distribute_parallel_for_simd)); + return new (Mem) OMPTargetTeamsDistributeParallelForSimdDirective( + CollapsedNum, NumClauses); +} + diff --git a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp index a8f493dca07d..a9c64c3ba6ae 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtPrinter.cpp @@ -1244,6 +1244,12 @@ void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForDirective( PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPTargetTeamsDistributeParallelForSimdDirective( + OMPTargetTeamsDistributeParallelForSimdDirective *Node) { + Indent() << "#pragma omp target teams distribute parallel for simd "; + PrintOMPExecutableDirective(Node); +} + //===----------------------------------------------------------------------===// // Expr printing methods. //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp index dd59a9b96c98..df36bf06b843 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtProfile.cpp @@ -763,6 +763,11 @@ void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForDirective( VisitOMPLoopDirective(S); } +void StmtProfiler::VisitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective *S) { + VisitOMPLoopDirective(S); +} + void StmtProfiler::VisitExpr(const Expr *S) { VisitStmt(S); } diff --git a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp index bf3cc05cdb6e..a1a463f1d037 100644 --- a/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp +++ b/contrib/llvm/tools/clang/lib/Analysis/CFG.cpp @@ -1690,15 +1690,19 @@ CFGBuilder::VisitLogicalOperator(BinaryOperator *B, // we have been provided. ExitBlock = RHSBlock = createBlock(false); + // Even though KnownVal is only used in the else branch of the next + // conditional, tryEvaluateBool performs additional checking on the + // Expr, so it should be called unconditionally. + TryResult KnownVal = tryEvaluateBool(RHS); + if (!KnownVal.isKnown()) + KnownVal = tryEvaluateBool(B); + if (!Term) { assert(TrueBlock == FalseBlock); addSuccessor(RHSBlock, TrueBlock); } else { RHSBlock->setTerminator(Term); - TryResult KnownVal = tryEvaluateBool(RHS); - if (!KnownVal.isKnown()) - KnownVal = tryEvaluateBool(B); addSuccessor(RHSBlock, TrueBlock, !KnownVal.isFalse()); addSuccessor(RHSBlock, FalseBlock, !KnownVal.isTrue()); } diff --git a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp index 4675995ea722..7bd1f8762bff 100644 --- a/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/OpenMPKinds.cpp @@ -685,6 +685,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind, #define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_CLAUSE(Name) \ case OMPC_##Name: \ return true; +#include "clang/Basic/OpenMPKinds.def" + default: + break; + } + break; + case OMPD_target_teams_distribute_parallel_for_simd: + switch (CKind) { +#define OPENMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_CLAUSE(Name) \ + case OMPC_##Name: \ + return true; #include "clang/Basic/OpenMPKinds.def" default: break; @@ -721,7 +731,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) { DKind == OMPD_teams_distribute_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for || DKind == OMPD_target_teams_distribute || - DKind == OMPD_target_teams_distribute_parallel_for; + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) { @@ -735,8 +746,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) { DKind == OMPD_target_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for || - DKind == OMPD_target_teams_distribute_parallel_for; - // TODO add next directives. + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) { @@ -752,8 +763,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) { DKind == OMPD_target_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for || DKind == OMPD_teams_distribute_parallel_for_simd || - DKind == OMPD_target_teams_distribute_parallel_for; - // TODO add next directives. + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) { @@ -761,7 +772,8 @@ bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) { DKind == OMPD_target_parallel_for || DKind == OMPD_target_parallel_for_simd || DKind == OMPD_target_simd || DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute || - DKind == OMPD_target_teams_distribute_parallel_for; + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind) { @@ -779,7 +791,8 @@ bool clang::isOpenMPNestingTeamsDirective(OpenMPDirectiveKind DKind) { bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) { return isOpenMPNestingTeamsDirective(DKind) || DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute || - DKind == OMPD_target_teams_distribute_parallel_for; + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) { @@ -788,8 +801,8 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) { DKind == OMPD_distribute_parallel_for_simd || DKind == OMPD_distribute_simd || DKind == OMPD_target_simd || DKind == OMPD_teams_distribute_simd || - DKind == OMPD_teams_distribute_parallel_for_simd; - // TODO add next directives. + DKind == OMPD_teams_distribute_parallel_for_simd || + DKind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPNestingDistributeDirective(OpenMPDirectiveKind Kind) { @@ -805,7 +818,8 @@ bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) { Kind == OMPD_teams_distribute_parallel_for_simd || Kind == OMPD_teams_distribute_parallel_for || Kind == OMPD_target_teams_distribute || - Kind == OMPD_target_teams_distribute_parallel_for; + Kind == OMPD_target_teams_distribute_parallel_for || + Kind == OMPD_target_teams_distribute_parallel_for_simd; } bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) { @@ -830,5 +844,6 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { Kind == OMPD_teams_distribute_parallel_for_simd || Kind == OMPD_teams_distribute_parallel_for || Kind == OMPD_target_teams_distribute || - Kind == OMPD_target_teams_distribute_parallel_for; + Kind == OMPD_target_teams_distribute_parallel_for || + Kind == OMPD_target_teams_distribute_parallel_for_simd; } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 451f9e9221ad..fe0e2acdfdbf 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -20,53 +20,64 @@ using namespace clang; using namespace CodeGen; -/// \brief Get the GPU warp size. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) { +namespace { +enum OpenMPRTLFunctionNVPTX { + /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, + /// kmp_int32 thread_limit); + OMPRTL_NVPTX__kmpc_kernel_init, +}; + +// NVPTX Address space +enum AddressSpace { + AddressSpaceShared = 3, +}; +} // namespace + +/// Get the GPU warp size. +static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), llvm::None, "nvptx_warp_size"); } -/// \brief Get the id of the current thread on the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) { +/// Get the id of the current thread on the GPU. +static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), llvm::None, "nvptx_tid"); } -// \brief Get the maximum number of threads in a block of the GPU. -llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) { +/// Get the maximum number of threads in a block of the GPU. +static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; return Bld.CreateCall( llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), llvm::None, "nvptx_num_threads"); } -/// \brief Get barrier to synchronize all threads in a block. -void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) { +/// Get barrier to synchronize all threads in a block. +static void getNVPTXCTABarrier(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; Bld.CreateCall(llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } -// \brief Synchronize all GPU threads in a block. -void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { - getNVPTXCTABarrier(CGF); -} +/// Synchronize all GPU threads in a block. +static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } -/// \brief Get the thread id of the OMP master thread. +/// Get the thread id of the OMP master thread. /// The master thread id is the first thread (lane) of the last warp in the /// GPU block. Warp size is assumed to be some power of 2. /// Thread id is 0 indexed. /// E.g: If NumThreads is 33, master id is 32. /// If NumThreads is 64, master id is 32. /// If NumThreads is 1024, master id is 992. -llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { +static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreads = getNVPTXNumThreads(CGF); @@ -77,19 +88,6 @@ llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) { Bld.CreateNot(Mask), "master_tid"); } -namespace { -enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle, - /// kmp_int32 thread_limit); - OMPRTL_NVPTX__kmpc_kernel_init, -}; - -// NVPTX Address space -enum ADDRESS_SPACE { - ADDRESS_SPACE_SHARED = 3, -}; -} // namespace - CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState( CodeGenModule &CGM) : WorkerFn(nullptr), CGFI(nullptr) { @@ -119,14 +117,14 @@ void CGOpenMPRuntimeNVPTX::initializeEnvironment() { CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0, - llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty)); WorkID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0, - llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED); + llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared); WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty)); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e18d28cdda9f..a33fb27579f6 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -49,38 +49,6 @@ public: void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); private: - // - // NVPTX calls. - // - - /// \brief Get the GPU warp size. - llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); - - /// \brief Get the id of the current thread on the GPU. - llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); - - // \brief Get the maximum number of threads in a block of the GPU. - llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); - - /// \brief Get barrier to synchronize all threads in a block. - void getNVPTXCTABarrier(CodeGenFunction &CGF); - - // \brief Synchronize all GPU threads in a block. - void syncCTAThreads(CodeGenFunction &CGF); - - // - // OMP calls. - // - - /// \brief Get the thread id of the OMP master thread. - /// The master thread id is the first thread (lane) of the last warp in the - /// GPU block. Warp size is assumed to be some power of 2. - /// Thread id is 0 indexed. - /// E.g: If NumThreads is 33, master id is 32. - /// If NumThreads is 64, master id is 32. - /// If NumThreads is 1024, master id is 992. - llvm::Value *getMasterThreadID(CodeGenFunction &CGF); - // // Private state and methods. // diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp index f2acb798b881..8d391f95d9f7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp @@ -326,6 +326,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitOMPTargetTeamsDistributeParallelForDirective( cast(*S)); break; + case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: + EmitOMPTargetTeamsDistributeParallelForSimdDirective( + cast(*S)); + break; } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index ba39e1fbd41f..386c4f0fe69c 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2032,6 +2032,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( }); } +void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_teams_distribute_parallel_for_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + /// \brief Emit a helper variable and return corresponding lvalue. static LValue EmitOMPHelperVar(CodeGenFunction &CGF, const DeclRefExpr *Helper) { @@ -2760,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { auto &RT = CGM.getOpenMPRuntime(); + bool HasLastprivateClause = false; // Check pre-condition. { OMPLoopScope PreInitScope(*this, S); @@ -2793,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); OMPPrivateScope LoopScope(*this); + if (EmitOMPFirstprivateClause(S, LoopScope)) { + // Emit implicit barrier to synchronize threads and avoid data races on + // initialization of firstprivate variables and post-update of + // lastprivate variables. + CGM.getOpenMPRuntime().emitBarrierCall( + *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } + EmitOMPPrivateClause(S, LoopScope); + HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); @@ -2849,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } + + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) + EmitOMPLastprivateClauseFinal( + S, /*NoFinals=*/false, + Builder.CreateIsNotNull( + EmitLoadOfScalar(IL, S.getLocStart()))); } // We're now done with the loop, so jump to the continuation block. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp index a954f487d1e4..7cab13de923b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1049,6 +1049,19 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, return ResTy; } +static bool +shouldUseUndefinedBehaviorReturnOptimization(const FunctionDecl *FD, + const ASTContext &Context) { + QualType T = FD->getReturnType(); + // Avoid the optimization for functions that return a record type with a + // trivial destructor or another trivially copyable type. + if (const RecordType *RT = T.getCanonicalType()->getAs()) { + if (const auto *ClassDecl = dyn_cast(RT->getDecl())) + return !ClassDecl->hasTrivialDestructor(); + } + return !T.isTriviallyCopyableType(Context); +} + void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo) { const FunctionDecl *FD = cast(GD.getDecl()); @@ -1127,17 +1140,23 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // function call is used by the caller, the behavior is undefined. if (getLangOpts().CPlusPlus && !FD->hasImplicitReturnZero() && !SawAsmBlock && !FD->getReturnType()->isVoidType() && Builder.GetInsertBlock()) { + bool ShouldEmitUnreachable = + CGM.getCodeGenOpts().StrictReturn || + shouldUseUndefinedBehaviorReturnOptimization(FD, getContext()); if (SanOpts.has(SanitizerKind::Return)) { SanitizerScope SanScope(this); llvm::Value *IsFalse = Builder.getFalse(); EmitCheck(std::make_pair(IsFalse, SanitizerKind::Return), SanitizerHandler::MissingReturn, EmitCheckSourceLocation(FD->getLocation()), None); - } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) { - EmitTrapCall(llvm::Intrinsic::trap); + } else if (ShouldEmitUnreachable) { + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + EmitTrapCall(llvm::Intrinsic::trap); + } + if (SanOpts.has(SanitizerKind::Return) || ShouldEmitUnreachable) { + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); } - Builder.CreateUnreachable(); - Builder.ClearInsertionPoint(); } // Emit the standard function epilogue. diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index 222d0e97968a..1347f54df9ac 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -2699,6 +2699,8 @@ public: const OMPTargetTeamsDistributeDirective &S); void EmitOMPTargetTeamsDistributeParallelForDirective( const OMPTargetTeamsDistributeParallelForDirective &S); + void EmitOMPTargetTeamsDistributeParallelForSimdDirective( + const OMPTargetTeamsDistributeParallelForSimdDirective &S); /// Emit outlined function for the target directive. static std::pairgetType()); EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip, Order); } diff --git a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp index 2a367bb29aa5..ea5ad7d051b6 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp @@ -2235,6 +2235,15 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args, UseSeparateSections)) { CmdArgs.push_back("-plugin-opt=-data-sections"); } + + if (Arg *A = Args.getLastArg(options::OPT_fprofile_sample_use_EQ)) { + StringRef FName = A->getValue(); + if (!llvm::sys::fs::exists(FName)) + D.Diag(diag::err_drv_no_such_file) << FName; + else + CmdArgs.push_back( + Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName)); + } } /// This is a helper function for validating the optional refinement step @@ -3058,6 +3067,10 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, continue; } + if (C.getDefaultToolChain().getTriple().isOSBinFormatCOFF() && + Value == "-mbig-obj") + continue; // LLVM handles bigobj automatically + switch (C.getDefaultToolChain().getArch()) { default: break; @@ -4453,6 +4466,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums, false)) CmdArgs.push_back("-fstrict-enums"); + if (!Args.hasFlag(options::OPT_fstrict_return, options::OPT_fno_strict_return, + true)) + CmdArgs.push_back("-fno-strict-return"); if (Args.hasFlag(options::OPT_fstrict_vtable_pointers, options::OPT_fno_strict_vtable_pointers, false)) diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index 70b90d6fa14e..389761d48249 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -638,6 +638,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { ChromiumStyle.BreakAfterJavaFieldAnnotations = true; ChromiumStyle.ContinuationIndentWidth = 8; ChromiumStyle.IndentWidth = 4; + } else if (Language == FormatStyle::LK_JavaScript) { + ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; + ChromiumStyle.AllowShortLoopsOnASingleLine = false; } else { ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp index 84e06d05c739..370cf7afa330 100644 --- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp @@ -1255,10 +1255,13 @@ void UnwrappedLineParser::tryToParseJSFunction() { if (FormatTok->is(tok::l_brace)) tryToParseBracedList(); else - while (FormatTok->isNot(tok::l_brace) && !eof()) + while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) nextToken(); } + if (FormatTok->is(tok::semi)) + return; + parseChildBlock(); } diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp index bd2ee06d1653..d8118cb30f63 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/ASTConsumers.cpp @@ -370,6 +370,26 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC, break; } + case Decl::ClassTemplateSpecialization: { + const auto *CTSD = cast(DC); + if (CTSD->isCompleteDefinition()) + Out << "[class template specialization] "; + else + Out << " "; + Out << *CTSD; + break; + } + + case Decl::ClassTemplatePartialSpecialization: { + const auto *CTPSD = cast(DC); + if (CTPSD->isCompleteDefinition()) + Out << "[class template partial specialization] "; + else + Out << " "; + Out << *CTPSD; + break; + } + default: llvm_unreachable("a decl that inherits DeclContext isn't handled"); } @@ -400,7 +420,8 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC, case Decl::CXXConstructor: case Decl::CXXDestructor: case Decl::CXXConversion: - { + case Decl::ClassTemplateSpecialization: + case Decl::ClassTemplatePartialSpecialization: { DeclContext* DC = cast(I); PrintDeclContext(DC, Indentation+2); break; @@ -478,6 +499,37 @@ void DeclContextPrinter::PrintDeclContext(const DeclContext* DC, Out << " " << '"' << I << "\"\n"; break; } + case Decl::Friend: { + Out << ""; + if (const NamedDecl *ND = cast(I)->getFriendDecl()) + Out << ' ' << *ND; + Out << "\n"; + break; + } + case Decl::Using: { + Out << " " << *cast(I) << "\n"; + break; + } + case Decl::UsingShadow: { + Out << " " << *cast(I) << "\n"; + break; + } + case Decl::Empty: { + Out << "\n"; + break; + } + case Decl::AccessSpec: { + Out << "\n"; + break; + } + case Decl::VarTemplate: { + Out << " " << *cast(I) << "\n"; + break; + } + case Decl::StaticAssert: { + Out << "\n"; + break; + } default: Out << "DeclKind: " << DK << '"' << I << "\"\n"; llvm_unreachable("decl unhandled"); diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index a0682e26e702..ca4a7655a37d 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -602,6 +602,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm); Opts.SoftFloat = Args.hasArg(OPT_msoft_float); Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums); + Opts.StrictReturn = !Args.hasArg(OPT_fno_strict_return); Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers); Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) || Args.hasArg(OPT_cl_unsafe_math_optimizations) || diff --git a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h index 205e15b40b5d..931d44b6965b 100644 --- a/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/contrib/llvm/tools/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -232,6 +232,11 @@ static inline __device__ void __brkpt(int __c) { __brkpt(); } // hardware, seems to generate faster machine code because ptxas can more easily // reason about our code. +#if CUDA_VERSION >= 8000 +#include "sm_60_atomic_functions.hpp" +#include "sm_61_intrinsics.hpp" +#endif + #undef __MATH_FUNCTIONS_HPP__ // math_functions.hpp defines ::signbit as a __host__ __device__ function. This diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp index 061721dfb8da..f9ea8af00f50 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseOpenMP.cpp @@ -119,7 +119,8 @@ static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) { { OMPD_target, OMPD_teams, OMPD_target_teams }, { OMPD_target_teams, OMPD_distribute, OMPD_target_teams_distribute }, { OMPD_target_teams_distribute, OMPD_parallel, OMPD_target_teams_distribute_parallel }, - { OMPD_target_teams_distribute_parallel, OMPD_for, OMPD_target_teams_distribute_parallel_for } + { OMPD_target_teams_distribute_parallel, OMPD_for, OMPD_target_teams_distribute_parallel_for }, + { OMPD_target_teams_distribute_parallel_for, OMPD_simd, OMPD_target_teams_distribute_parallel_for_simd } }; enum { CancellationPoint = 0, DeclareReduction = 1, TargetData = 2 }; auto Tok = P.getCurToken(); @@ -758,6 +759,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: Diag(Tok, diag::err_omp_unexpected_directive) << getOpenMPDirectiveName(DKind); break; @@ -796,7 +798,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /// 'teams distribute parallel for simd' | /// 'teams distribute parallel for' | 'target teams' | /// 'target teams distribute' | -/// 'target teams distribute parallel for' {clause} +/// 'target teams distribute parallel for' | +/// 'target teams distribute parallel for simd' {clause} /// annot_pragma_openmp_end /// StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( @@ -912,7 +915,8 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( case OMPD_teams_distribute_parallel_for: case OMPD_target_teams: case OMPD_target_teams_distribute: - case OMPD_target_teams_distribute_parallel_for: { + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { ConsumeToken(); // Parse directive name of the 'critical' directive if any. if (DKind == OMPD_critical) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp index 6f272ec839f5..282633bbc9e1 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp @@ -228,10 +228,8 @@ void Sema::EraseUnwantedCUDAMatches( [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); // Erase all functions with lower priority. - Matches.erase( - llvm::remove_if( - Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }), - Matches.end()); + llvm::erase_if(Matches, + [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); } /// When an implicitly-declared special member has to invoke more than one diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp index d76bde574677..3eef366b75b3 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCodeComplete.cpp @@ -7471,6 +7471,23 @@ void Sema::CodeCompleteObjCMethodDeclSelector(Scope *S, } Results.ExitScope(); + + if (!AtParameterName && !SelIdents.empty() && + SelIdents.front()->getName().startswith("init")) { + for (const auto &M : PP.macros()) { + if (M.first->getName() != "NS_DESIGNATED_INITIALIZER") + continue; + Results.EnterNewScope(); + CodeCompletionBuilder Builder(Results.getAllocator(), + Results.getCodeCompletionTUInfo()); + Builder.AddTypedTextChunk( + Builder.getAllocator().CopyString(M.first->getName())); + Results.AddResult(CodeCompletionResult(Builder.TakeString(), CCP_Macro, + CXCursor_MacroDefinition)); + Results.ExitScope(); + } + } + HandleCodeCompleteResults(this, CodeCompleter, CodeCompletionContext::CCC_Other, Results.data(),Results.size()); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp index 804aadc0ff77..edceb537df75 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp @@ -1700,7 +1700,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_teams_distribute_parallel_for_simd: case OMPD_teams_distribute_parallel_for: case OMPD_target_teams_distribute: - case OMPD_target_teams_distribute_parallel_for: { + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); @@ -2439,6 +2440,12 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( AllowedNameModifiers.push_back(OMPD_target); AllowedNameModifiers.push_back(OMPD_parallel); break; + case OMPD_target_teams_distribute_parallel_for_simd: + Res = ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( + ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); + AllowedNameModifiers.push_back(OMPD_target); + AllowedNameModifiers.push_back(OMPD_parallel); + break; case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_threadprivate: @@ -6375,6 +6382,52 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective( Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective( + ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc, + llvm::DenseMap &VarsWithImplicitDSA) { + if (!AStmt) + return StmtError(); + + CapturedStmt *CS = cast(AStmt); + // 1.2.2 OpenMP Language Terminology + // Structured block - An executable statement with a single entry at the + // top and a single exit at the bottom. + // The point of exit cannot be a branch out of the structured block. + // longjmp() and throw() must not violate the entry/exit criteria. + CS->getCapturedDecl()->setNothrow(); + + OMPLoopDirective::HelperExprs B; + // In presence of clause 'collapse' with number of loops, it will + // define the nested loops number. + auto NestedLoopCount = CheckOpenMPLoop( + OMPD_target_teams_distribute_parallel_for_simd, + getCollapseNumberExpr(Clauses), + nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack, + VarsWithImplicitDSA, B); + if (NestedLoopCount == 0) + return StmtError(); + + assert((CurContext->isDependentContext() || B.builtAll()) && + "omp target teams distribute parallel for simd loop exprs were not " + "built"); + + if (!CurContext->isDependentContext()) { + // Finalize the clauses that need pre-built expressions for CodeGen. + for (auto C : Clauses) { + if (auto *LC = dyn_cast(C)) + if (FinishOpenMPLinearClause(*LC, cast(B.IterationVarRef), + B.NumIterations, *this, CurScope, + DSAStack)) + return StmtError(); + } + } + + getCurFunction()->setHasBranchProtectedScope(); + return OMPTargetTeamsDistributeParallelForSimdDirective::Create( + Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); +} + OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, SourceLocation LParenLoc, @@ -7397,7 +7450,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef VarList, if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel || CurrDir == OMPD_target_teams || CurrDir == OMPD_target_teams_distribute || - CurrDir == OMPD_target_teams_distribute_parallel_for) { + CurrDir == OMPD_target_teams_distribute_parallel_for || + CurrDir == OMPD_target_teams_distribute_parallel_for_simd) { OpenMPClauseKind ConflictKind; if (DSAStack->checkMappableExprComponentListsForDecl( VD, /*CurrentRegionOnly=*/true, @@ -7657,7 +7711,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, if (CurrDir == OMPD_target || CurrDir == OMPD_target_parallel || CurrDir == OMPD_target_teams || CurrDir == OMPD_target_teams_distribute || - CurrDir == OMPD_target_teams_distribute_parallel_for) { + CurrDir == OMPD_target_teams_distribute_parallel_for || + CurrDir == OMPD_target_teams_distribute_parallel_for_simd) { OpenMPClauseKind ConflictKind; if (DSAStack->checkMappableExprComponentListsForDecl( VD, /*CurrentRegionOnly=*/true, @@ -10175,7 +10230,8 @@ checkMappableExpressionList(Sema &SemaRef, DSAStackTy *DSAS, // attribute clause on the same construct if ((DKind == OMPD_target || DKind == OMPD_target_teams || DKind == OMPD_target_teams_distribute || - DKind == OMPD_target_teams_distribute_parallel_for) && VD) { + DKind == OMPD_target_teams_distribute_parallel_for || + DKind == OMPD_target_teams_distribute_parallel_for_simd) && VD) { auto DVar = DSAS->getTopDSA(VD, false); if (isOpenMPPrivate(DVar.CKind)) { SemaRef.Diag(ELoc, diag::err_omp_variable_in_given_clause_and_dsa) diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp index 47e3df20d911..b5c0e634fa50 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp @@ -8958,9 +8958,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, S.IdentifyCUDAPreference(Caller, Cand->Function) == Sema::CFP_WrongSide; }; - Candidates.erase(std::remove_if(Candidates.begin(), Candidates.end(), - IsWrongSideCandidate), - Candidates.end()); + llvm::erase_if(Candidates, IsWrongSideCandidate); } } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp index facc5d1b375b..66a10ef7993e 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp @@ -4244,7 +4244,7 @@ namespace { UnnamedLocalNoLinkageFinder(Sema &S, SourceRange SR) : S(S), SR(SR) { } bool Visit(QualType T) { - return inherited::Visit(T.getTypePtr()); + return T.isNull() ? false : inherited::Visit(T.getTypePtr()); } #define TYPE(Class, Parent) \ @@ -4497,17 +4497,7 @@ bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param, // // C++11 allows these, and even in C++03 we allow them as an extension with // a warning. - bool NeedsCheck; - if (LangOpts.CPlusPlus11) - NeedsCheck = - !Diags.isIgnored(diag::warn_cxx98_compat_template_arg_unnamed_type, - SR.getBegin()) || - !Diags.isIgnored(diag::warn_cxx98_compat_template_arg_local_type, - SR.getBegin()); - else - NeedsCheck = Arg->hasUnnamedOrLocalType(); - - if (NeedsCheck) { + if (LangOpts.CPlusPlus11 || Arg->hasUnnamedOrLocalType()) { UnnamedLocalNoLinkageFinder Finder(*this, SR); (void)Finder.Visit(Context.getCanonicalType(Arg)); } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp index 0bc85a2f2635..7f1fd91c46f0 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -288,19 +288,22 @@ checkDeducedTemplateArguments(ASTContext &Context, X.pack_size() != Y.pack_size()) return DeducedTemplateArgument(); + llvm::SmallVector NewPack; for (TemplateArgument::pack_iterator XA = X.pack_begin(), XAEnd = X.pack_end(), YA = Y.pack_begin(); XA != XAEnd; ++XA, ++YA) { - // FIXME: Do we need to merge the results together here? - if (checkDeducedTemplateArguments(Context, - DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()), - DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound())) - .isNull()) + TemplateArgument Merged = checkDeducedTemplateArguments( + Context, DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()), + DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound())); + if (Merged.isNull()) return DeducedTemplateArgument(); + NewPack.push_back(Merged); } - return X; + return DeducedTemplateArgument( + TemplateArgument::CreatePackCopy(Context, NewPack), + X.wasDeducedFromArrayBound() && Y.wasDeducedFromArrayBound()); } llvm_unreachable("Invalid TemplateArgument Kind!"); @@ -672,17 +675,20 @@ public: // for that pack, then clear out the deduced argument. for (auto &Pack : Packs) { DeducedTemplateArgument &DeducedArg = Deduced[Pack.Index]; - if (!DeducedArg.isNull()) { + if (!Pack.New.empty() || !DeducedArg.isNull()) { + while (Pack.New.size() < PackElements) + Pack.New.push_back(DeducedTemplateArgument()); Pack.New.push_back(DeducedArg); DeducedArg = DeducedTemplateArgument(); } } + ++PackElements; } /// \brief Finish template argument deduction for a set of argument packs, /// producing the argument packs and checking for consistency with prior /// deductions. - Sema::TemplateDeductionResult finish(bool HasAnyArguments) { + Sema::TemplateDeductionResult finish() { // Build argument packs for each of the parameter packs expanded by this // pack expansion. for (auto &Pack : Packs) { @@ -691,7 +697,7 @@ public: // Build or find a new value for this pack. DeducedTemplateArgument NewPack; - if (HasAnyArguments && Pack.New.empty()) { + if (PackElements && Pack.New.empty()) { if (Pack.DeferredDeduction.isNull()) { // We were not able to deduce anything for this parameter pack // (because it only appeared in non-deduced contexts), so just @@ -758,6 +764,7 @@ private: TemplateParameterList *TemplateParams; SmallVectorImpl &Deduced; TemplateDeductionInfo &Info; + unsigned PackElements = 0; SmallVector Packs; }; @@ -861,10 +868,7 @@ DeduceTemplateArguments(Sema &S, QualType Pattern = Expansion->getPattern(); PackDeductionScope PackScope(S, TemplateParams, Deduced, Info, Pattern); - bool HasAnyArguments = false; for (; ArgIdx < NumArgs; ++ArgIdx) { - HasAnyArguments = true; - // Deduce template arguments from the pattern. if (Sema::TemplateDeductionResult Result = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams, Pattern, @@ -877,7 +881,7 @@ DeduceTemplateArguments(Sema &S, // Build argument packs for each of the parameter packs expanded by this // pack expansion. - if (auto Result = PackScope.finish(HasAnyArguments)) + if (auto Result = PackScope.finish()) return Result; } @@ -1935,10 +1939,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // Keep track of the deduced template arguments for each parameter pack // expanded by this pack expansion (the outer index) and for each // template argument (the inner SmallVectors). - bool HasAnyArguments = false; for (; hasTemplateArgumentForDeduction(Args, ArgIdx); ++ArgIdx) { - HasAnyArguments = true; - // Deduce template arguments from the pattern. if (Sema::TemplateDeductionResult Result = DeduceTemplateArguments(S, TemplateParams, Pattern, Args[ArgIdx], @@ -1950,7 +1951,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // Build argument packs for each of the parameter packs expanded by this // pack expansion. - if (auto Result = PackScope.finish(HasAnyArguments)) + if (auto Result = PackScope.finish()) return Result; } @@ -2145,6 +2146,16 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param, InnerArg.setDeducedFromArrayBound(Arg.wasDeducedFromArrayBound()); assert(InnerArg.getKind() != TemplateArgument::Pack && "deduced nested pack"); + if (P.isNull()) { + // We deduced arguments for some elements of this pack, but not for + // all of them. This happens if we get a conditionally-non-deduced + // context in a pack expansion (such as an overload set in one of the + // arguments). + S.Diag(Param->getLocation(), + diag::err_template_arg_deduced_incomplete_pack) + << Arg << Param; + return true; + } if (ConvertArg(InnerArg, PackedArgsBuilder.size())) return true; @@ -3192,67 +3203,59 @@ static Sema::TemplateDeductionResult DeduceTemplateArgumentByListElement( /// \brief Attempt template argument deduction from an initializer list /// deemed to be an argument in a function call. -static bool +static Sema::TemplateDeductionResult DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams, QualType AdjustedParamType, InitListExpr *ILE, TemplateDeductionInfo &Info, SmallVectorImpl &Deduced, - unsigned TDF, Sema::TemplateDeductionResult &Result) { - - // [temp.deduct.call] p1 (post CWG-1591) - // If removing references and cv-qualifiers from P gives - // std::initializer_list or P0[N] for some P0 and N and the argument is a - // non-empty initializer list (8.5.4), then deduction is performed instead for - // each element of the initializer list, taking P0 as a function template - // parameter type and the initializer element as its argument, and in the - // P0[N] case, if N is a non-type template parameter, N is deduced from the - // length of the initializer list. Otherwise, an initializer list argument - // causes the parameter to be considered a non-deduced context - - const bool IsConstSizedArray = AdjustedParamType->isConstantArrayType(); - - const bool IsDependentSizedArray = - !IsConstSizedArray && AdjustedParamType->isDependentSizedArrayType(); - - QualType ElTy; // The element type of the std::initializer_list or the array. - - const bool IsSTDList = !IsConstSizedArray && !IsDependentSizedArray && - S.isStdInitializerList(AdjustedParamType, &ElTy); - - if (!IsConstSizedArray && !IsDependentSizedArray && !IsSTDList) - return false; - - Result = Sema::TDK_Success; - // If we are not deducing against the 'T' in a std::initializer_list then - // deduce against the 'T' in T[N]. - if (ElTy.isNull()) { - assert(!IsSTDList); - ElTy = S.Context.getAsArrayType(AdjustedParamType)->getElementType(); + unsigned TDF) { + // C++ [temp.deduct.call]p1: (CWG 1591) + // If removing references and cv-qualifiers from P gives + // std::initializer_list or P0[N] for some P0 and N and the argument is + // a non-empty initializer list, then deduction is performed instead for + // each element of the initializer list, taking P0 as a function template + // parameter type and the initializer element as its argument + // + // FIXME: Remove references and cv-qualifiers here? Consider + // std::initializer_list&&> + QualType ElTy; + auto *ArrTy = S.Context.getAsArrayType(AdjustedParamType); + if (ArrTy) + ElTy = ArrTy->getElementType(); + else if (!S.isStdInitializerList(AdjustedParamType, &ElTy)) { + // Otherwise, an initializer list argument causes the parameter to be + // considered a non-deduced context + return Sema::TDK_Success; } + // Deduction only needs to be done for dependent types. if (ElTy->isDependentType()) { for (Expr *E : ILE->inits()) { - if ((Result = DeduceTemplateArgumentByListElement(S, TemplateParams, ElTy, - E, Info, Deduced, TDF))) - return true; + if (auto Result = DeduceTemplateArgumentByListElement( + S, TemplateParams, ElTy, E, Info, Deduced, TDF)) + return Result; } } - if (IsDependentSizedArray) { - const DependentSizedArrayType *ArrTy = - S.Context.getAsDependentSizedArrayType(AdjustedParamType); + + // in the P0[N] case, if N is a non-type template parameter, N is deduced + // from the length of the initializer list. + // FIXME: We're not supposed to get here if N would be deduced as 0. + if (auto *DependentArrTy = dyn_cast_or_null(ArrTy)) { // Determine the array bound is something we can deduce. if (NonTypeTemplateParmDecl *NTTP = - getDeducedParameterFromExpr(Info, ArrTy->getSizeExpr())) { + getDeducedParameterFromExpr(Info, DependentArrTy->getSizeExpr())) { // We can perform template argument deduction for the given non-type // template parameter. llvm::APInt Size(S.Context.getIntWidth(NTTP->getType()), ILE->getNumInits()); - Result = DeduceNonTypeTemplateArgument( - S, TemplateParams, NTTP, llvm::APSInt(Size), NTTP->getType(), - /*ArrayBound=*/true, Info, Deduced); + if (auto Result = DeduceNonTypeTemplateArgument( + S, TemplateParams, NTTP, llvm::APSInt(Size), NTTP->getType(), + /*ArrayBound=*/true, Info, Deduced)) + return Result; } } - return true; + + return Sema::TDK_Success; } /// \brief Perform template argument deduction by matching a parameter type @@ -3268,15 +3271,10 @@ DeduceTemplateArgumentByListElement(Sema &S, unsigned TDF) { // Handle the case where an init list contains another init list as the // element. - if (InitListExpr *ILE = dyn_cast(Arg)) { - Sema::TemplateDeductionResult Result; - if (!DeduceFromInitializerList(S, TemplateParams, - ParamType.getNonReferenceType(), ILE, Info, - Deduced, TDF, Result)) - return Sema::TDK_Success; // Just ignore this expression. - - return Result; - } + if (InitListExpr *ILE = dyn_cast(Arg)) + return DeduceFromInitializerList(S, TemplateParams, + ParamType.getNonReferenceType(), ILE, Info, + Deduced, TDF); // For all other cases, just match by type. QualType ArgType = Arg->getType(); @@ -3363,58 +3361,51 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments( ParamTypes.push_back(Function->getParamDecl(I)->getType()); } + SmallVector OriginalCallArgs; + + // Deduce an argument of type ParamType from an expression with index ArgIdx. + auto DeduceCallArgument = [&](QualType ParamType, unsigned ArgIdx) { + Expr *Arg = Args[ArgIdx]; + QualType ArgType = Arg->getType(); + QualType OrigParamType = ParamType; + + unsigned TDF = 0; + if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams, + ParamType, ArgType, Arg, + TDF)) + return Sema::TDK_Success; + + // If we have nothing to deduce, we're done. + if (!hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType)) + return Sema::TDK_Success; + + // If the argument is an initializer list ... + if (InitListExpr *ILE = dyn_cast(Arg)) + return DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE, + Info, Deduced, TDF); + + // Keep track of the argument type and corresponding parameter index, + // so we can check for compatibility between the deduced A and A. + OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx, ArgType)); + + return DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, ParamType, + ArgType, Info, Deduced, TDF); + }; + // Deduce template arguments from the function parameters. Deduced.resize(TemplateParams->size()); - unsigned ArgIdx = 0; - SmallVector OriginalCallArgs; - for (unsigned ParamIdx = 0, NumParamTypes = ParamTypes.size(); + for (unsigned ParamIdx = 0, NumParamTypes = ParamTypes.size(), ArgIdx = 0; ParamIdx != NumParamTypes; ++ParamIdx) { - QualType OrigParamType = ParamTypes[ParamIdx]; - QualType ParamType = OrigParamType; + QualType ParamType = ParamTypes[ParamIdx]; - const PackExpansionType *ParamExpansion - = dyn_cast(ParamType); + const PackExpansionType *ParamExpansion = + dyn_cast(ParamType); if (!ParamExpansion) { // Simple case: matching a function parameter to a function argument. if (ArgIdx >= CheckArgs) break; - Expr *Arg = Args[ArgIdx++]; - QualType ArgType = Arg->getType(); - - unsigned TDF = 0; - if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams, - ParamType, ArgType, Arg, - TDF)) - continue; - - // If we have nothing to deduce, we're done. - if (!hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType)) - continue; - - // If the argument is an initializer list ... - if (InitListExpr *ILE = dyn_cast(Arg)) { - TemplateDeductionResult Result; - // Removing references was already done. - if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE, - Info, Deduced, TDF, Result)) - continue; - - if (Result) - return Result; - // Don't track the argument type, since an initializer list has none. - continue; - } - - // Keep track of the argument type and corresponding parameter index, - // so we can check for compatibility between the deduced A and A. - OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx-1, - ArgType)); - - if (TemplateDeductionResult Result - = DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, - ParamType, ArgType, - Info, Deduced, TDF)) + if (auto Result = DeduceCallArgument(ParamType, ArgIdx++)) return Result; continue; @@ -3429,6 +3420,9 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments( // the function parameter pack. For a function parameter pack that does // not occur at the end of the parameter-declaration-list, the type of // the parameter pack is a non-deduced context. + // FIXME: This does not say that subsequent parameters are also non-deduced. + // See also DR1388 / DR1399, which effectively says we should keep deducing + // after the pack. if (ParamIdx + 1 < NumParamTypes) break; @@ -3436,57 +3430,13 @@ Sema::TemplateDeductionResult Sema::DeduceTemplateArguments( PackDeductionScope PackScope(*this, TemplateParams, Deduced, Info, ParamPattern); - bool HasAnyArguments = false; - for (; ArgIdx < Args.size(); ++ArgIdx) { - HasAnyArguments = true; - - QualType OrigParamType = ParamPattern; - ParamType = OrigParamType; - Expr *Arg = Args[ArgIdx]; - QualType ArgType = Arg->getType(); - - unsigned TDF = 0; - if (AdjustFunctionParmAndArgTypesForDeduction(*this, TemplateParams, - ParamType, ArgType, Arg, - TDF)) { - // We can't actually perform any deduction for this argument, so stop - // deduction at this point. - ++ArgIdx; - break; - } - - // As above, initializer lists need special handling. - if (InitListExpr *ILE = dyn_cast(Arg)) { - TemplateDeductionResult Result; - if (!DeduceFromInitializerList(*this, TemplateParams, ParamType, ILE, - Info, Deduced, TDF, Result)) { - ++ArgIdx; - break; - } - - if (Result) - return Result; - } else { - - // Keep track of the argument type and corresponding argument index, - // so we can check for compatibility between the deduced A and A. - if (hasDeducibleTemplateParameters(*this, FunctionTemplate, ParamType)) - OriginalCallArgs.push_back(OriginalCallArg(OrigParamType, ArgIdx, - ArgType)); - - if (TemplateDeductionResult Result - = DeduceTemplateArgumentsByTypeMatch(*this, TemplateParams, - ParamType, ArgType, Info, - Deduced, TDF)) - return Result; - } - - PackScope.nextPackElement(); - } + for (; ArgIdx < Args.size(); PackScope.nextPackElement(), ++ArgIdx) + if (auto Result = DeduceCallArgument(ParamPattern, ArgIdx)) + return Result; // Build argument packs for each of the parameter packs expanded by this // pack expansion. - if (auto Result = PackScope.finish(HasAnyArguments)) + if (auto Result = PackScope.finish()) return Result; // After we've matching against a parameter pack, we're done. diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h index 3ab6019f0ec3..66892936e573 100644 --- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h @@ -7766,6 +7766,20 @@ TreeTransform::TransformOMPTargetTeamsDistributeParallelForDirective( return Res; } +template +StmtResult TreeTransform:: + TransformOMPTargetTeamsDistributeParallelForSimdDirective( + OMPTargetTeamsDistributeParallelForSimdDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock( + OMPD_target_teams_distribute_parallel_for_simd, DirName, nullptr, + D->getLocStart()); + auto Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + + //===----------------------------------------------------------------------===// // OpenMP clause transformation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp index 5607f764a9c3..19fac55664ae 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2860,6 +2860,11 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective( VisitOMPLoopDirective(D); } +void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForSimdDirective( + OMPTargetTeamsDistributeParallelForSimdDirective *D) { + VisitOMPLoopDirective(D); +} + //===----------------------------------------------------------------------===// // ASTReader Implementation //===----------------------------------------------------------------------===// @@ -3638,6 +3643,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE: { + auto NumClauses = Record[ASTStmtReader::NumStmtFields]; + auto CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1]; + S = OMPTargetTeamsDistributeParallelForSimdDirective::CreateEmpty( + Context, NumClauses, CollapsedNum, Empty); + break; + } + case EXPR_CXX_OPERATOR_CALL: S = new (Context) CXXOperatorCallExpr(Context, Empty); break; diff --git a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp index 3993be146edf..162b2bd25260 100644 --- a/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/contrib/llvm/tools/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2556,6 +2556,13 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective( Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE; } +void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForSimdDirective( + OMPTargetTeamsDistributeParallelForSimdDirective *D) { + VisitOMPLoopDirective(D); + Code = serialization:: + STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE; +} + //===----------------------------------------------------------------------===// // ASTWriter Implementation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 5b2119aeda27..707168b4de0a 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -869,6 +869,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTargetTeamsDirectiveClass: case Stmt::OMPTargetTeamsDistributeDirectiveClass: case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass: + case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass: llvm_unreachable("Stmt should not be in analyzer evaluation loop"); case Stmt::ObjCSubscriptRefExprClass: diff --git a/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp b/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp new file mode 100644 index 000000000000..3bbc2b901e38 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Tooling/Core/Diagnostic.cpp @@ -0,0 +1,46 @@ +//===--- Diagnostic.cpp - Framework for clang diagnostics tools ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements classes to support/store diagnostics refactoring. +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Core/Diagnostic.h" +#include "clang/Basic/SourceManager.h" + +namespace clang { +namespace tooling { + +DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message) + : Message(Message), FileOffset(0) {} + +DiagnosticMessage::DiagnosticMessage(llvm::StringRef Message, + const SourceManager &Sources, + SourceLocation Loc) + : Message(Message) { + assert(Loc.isValid() && Loc.isFileID()); + FilePath = Sources.getFilename(Loc); + FileOffset = Sources.getFileOffset(Loc); +} + +Diagnostic::Diagnostic(llvm::StringRef DiagnosticName, + Diagnostic::Level DiagLevel, StringRef BuildDirectory) + : DiagnosticName(DiagnosticName), DiagLevel(DiagLevel), + BuildDirectory(BuildDirectory) {} + +Diagnostic::Diagnostic(llvm::StringRef DiagnosticName, + DiagnosticMessage &Message, + llvm::StringMap &Fix, + SmallVector &Notes, + Level DiagLevel, llvm::StringRef BuildDirectory) + : DiagnosticName(DiagnosticName), Message(Message), Fix(Fix), Notes(Notes), + DiagLevel(DiagLevel), BuildDirectory(BuildDirectory) {} + +} // end namespace tooling +} // end namespace clang diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp index 56d5a3651143..d5c52a69be69 100644 --- a/contrib/llvm/tools/lld/COFF/PDB.cpp +++ b/contrib/llvm/tools/lld/COFF/PDB.cpp @@ -13,6 +13,7 @@ #include "Error.h" #include "SymbolTable.h" #include "Symbols.h" +#include "llvm/DebugInfo/CodeView/CVDebugRecord.h" #include "llvm/DebugInfo/CodeView/SymbolDumper.h" #include "llvm/DebugInfo/CodeView/TypeDumper.h" #include "llvm/DebugInfo/MSF/ByteStream.h" @@ -131,7 +132,8 @@ static void addTypeInfo(SymbolTable *Symtab, // Creates a PDB file. void coff::createPDB(StringRef Path, SymbolTable *Symtab, - ArrayRef SectionTable) { + ArrayRef SectionTable, + const llvm::codeview::DebugInfo *DI) { if (Config->DumpPdb) dumpCodeView(Symtab); @@ -146,11 +148,9 @@ void coff::createPDB(StringRef Path, SymbolTable *Symtab, // Add an Info stream. auto &InfoBuilder = Builder.getInfoBuilder(); - InfoBuilder.setAge(1); - - // Should be a random number, 0 for now. - InfoBuilder.setGuid({}); - + InfoBuilder.setAge(DI->PDB70.Age); + InfoBuilder.setGuid( + *reinterpret_cast(&DI->PDB70.Signature)); // Should be the current time, but set 0 for reproducibilty. InfoBuilder.setSignature(0); InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); diff --git a/contrib/llvm/tools/lld/COFF/PDB.h b/contrib/llvm/tools/lld/COFF/PDB.h index 091e90fa1ef1..c9c37914299a 100644 --- a/contrib/llvm/tools/lld/COFF/PDB.h +++ b/contrib/llvm/tools/lld/COFF/PDB.h @@ -13,12 +13,19 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +namespace llvm { +namespace codeview { +union DebugInfo; +} +} + namespace lld { namespace coff { class SymbolTable; void createPDB(llvm::StringRef Path, SymbolTable *Symtab, - llvm::ArrayRef SectionTable); + llvm::ArrayRef SectionTable, + const llvm::codeview::DebugInfo *DI); } } diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp index 3e69aebbb424..71217ebeb60a 100644 --- a/contrib/llvm/tools/lld/COFF/Writer.cpp +++ b/contrib/llvm/tools/lld/COFF/Writer.cpp @@ -304,7 +304,7 @@ void Writer::run() { writeBuildId(); if (!Config->PDBPath.empty()) - createPDB(Config->PDBPath, Symtab, SectionTable); + createPDB(Config->PDBPath, Symtab, SectionTable, BuildId->DI); if (auto EC = Buffer->commit()) fatal(EC, "failed to write the output file"); diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp index 2a8659921463..f4128c5096cb 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -461,7 +461,7 @@ SymbolBody *elf::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { StringRefZ Name = this->StringTable.data() + Sym->st_name; if (Sym->st_shndx == SHN_UNDEF) return new (BAlloc) - Undefined(Name, /*IsLocal=*/true, StOther, Type, this); + Undefined(Name, /*IsLocal=*/true, StOther, Type, this); return new (BAlloc) DefinedRegular(Name, /*IsLocal=*/true, StOther, Type, Value, Size, Sec, this); diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp index a3d6a141a202..b342b6195f1d 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.cpp +++ b/contrib/llvm/tools/lld/ELF/LTO.cpp @@ -96,12 +96,12 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {} BitcodeCompiler::~BitcodeCompiler() = default; -static void undefine(Symbol *S) { - replaceBody(S, S->body()->getName(), /*IsLocal=*/false, - STV_DEFAULT, S->body()->Type, nullptr); +template static void undefine(Symbol *S) { + replaceBody>(S, S->body()->getName(), /*IsLocal=*/false, + STV_DEFAULT, S->body()->Type, nullptr); } -void BitcodeCompiler::add(BitcodeFile &F) { +template void BitcodeCompiler::add(BitcodeFile &F) { lto::InputFile &Obj = *F.Obj; unsigned SymNum = 0; std::vector Syms = F.getSymbols(); @@ -126,7 +126,7 @@ void BitcodeCompiler::add(BitcodeFile &F) { R.VisibleToRegularObj = Sym->IsUsedInRegularObj || (R.Prevailing && Sym->includeInDynsym()); if (R.Prevailing) - undefine(Sym); + undefine(Sym); } checkError(LTOObj->add(std::move(F.Obj), Resols)); } @@ -157,3 +157,8 @@ std::vector BitcodeCompiler::compile() { } return Ret; } + +template void BitcodeCompiler::template add(BitcodeFile &); +template void BitcodeCompiler::template add(BitcodeFile &); +template void BitcodeCompiler::template add(BitcodeFile &); +template void BitcodeCompiler::template add(BitcodeFile &); diff --git a/contrib/llvm/tools/lld/ELF/LTO.h b/contrib/llvm/tools/lld/ELF/LTO.h index b3d734f2d381..3cb763650e1c 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.h +++ b/contrib/llvm/tools/lld/ELF/LTO.h @@ -43,7 +43,7 @@ public: BitcodeCompiler(); ~BitcodeCompiler(); - void add(BitcodeFile &F); + template void add(BitcodeFile &F); std::vector compile(); private: diff --git a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp index 79097e176e68..f08fa6229c1a 100644 --- a/contrib/llvm/tools/lld/ELF/SymbolTable.cpp +++ b/contrib/llvm/tools/lld/ELF/SymbolTable.cpp @@ -115,7 +115,7 @@ template void SymbolTable::addCombinedLTOObject() { // Compile bitcode files and replace bitcode symbols. LTO.reset(new BitcodeCompiler); for (BitcodeFile *F : BitcodeFiles) - LTO->add(*F); + LTO->add(*F); for (InputFile *File : LTO->compile()) { ObjectFile *Obj = cast>(File); @@ -256,7 +256,7 @@ Symbol *SymbolTable::addUndefined(StringRef Name, bool IsLocal, insert(Name, Type, getVisibility(StOther), CanOmitFromDynSym, File); if (WasInserted) { S->Binding = Binding; - replaceBody(S, Name, IsLocal, StOther, Type, File); + replaceBody>(S, Name, IsLocal, StOther, Type, File); return S; } if (Binding != STB_WEAK) { @@ -432,7 +432,7 @@ void SymbolTable::addShared(SharedFile *F, StringRef Name, if (S->VersionId == VER_NDX_LOCAL) S->VersionId = VER_NDX_GLOBAL; } - if (WasInserted || isa(S->body())) { + if (WasInserted || isa>(S->body())) { replaceBody>(S, F, Name, Sym, Verdef); if (!S->isWeak()) F->IsUsed = true; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.cpp b/contrib/llvm/tools/lld/ELF/Symbols.cpp index a2133f411c20..f168d37bdf0a 100644 --- a/contrib/llvm/tools/lld/ELF/Symbols.cpp +++ b/contrib/llvm/tools/lld/ELF/Symbols.cpp @@ -173,6 +173,8 @@ template typename ELFT::uint SymbolBody::getThunkVA() const { return DR->ThunkData->getVA(); if (const auto *S = dyn_cast>(this)) return S->ThunkData->getVA(); + if (const auto *S = dyn_cast>(this)) + return S->ThunkData->getVA(); fatal("getThunkVA() not supported for Symbol class\n"); } @@ -232,8 +234,9 @@ template bool DefinedRegular::isMipsPIC() const { (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); } -Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, - uint8_t Type, InputFile *File) +template +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, + uint8_t Type, InputFile *File) : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) { this->File = File; } @@ -354,6 +357,11 @@ template uint32_t SymbolBody::template getSize() const; template uint64_t SymbolBody::template getSize() const; template uint64_t SymbolBody::template getSize() const; +template class elf::Undefined; +template class elf::Undefined; +template class elf::Undefined; +template class elf::Undefined; + template class elf::DefinedRegular; template class elf::DefinedRegular; template class elf::DefinedRegular; diff --git a/contrib/llvm/tools/lld/ELF/Symbols.h b/contrib/llvm/tools/lld/ELF/Symbols.h index c95241a5293e..cbf8fa81a138 100644 --- a/contrib/llvm/tools/lld/ELF/Symbols.h +++ b/contrib/llvm/tools/lld/ELF/Symbols.h @@ -236,7 +236,7 @@ public: const OutputSectionBase *Section; }; -class Undefined : public SymbolBody { +template class Undefined : public SymbolBody { public: Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type, InputFile *F); @@ -245,6 +245,12 @@ public: return S->kind() == UndefinedKind; } + // If non-null the symbol has a Thunk that may be used as an alternative + // destination for callers of this Symbol. When linking a DSO undefined + // symbols are implicitly imported, the symbol lookup will be performed by + // the dynamic loader. A call to an undefined symbol will be given a PLT + // entry and on ARM this may need a Thunk if the caller is in Thumb state. + Thunk *ThunkData = nullptr; InputFile *file() { return this->File; } }; @@ -416,7 +422,8 @@ struct Symbol { // ELFT, and we verify this with the static_asserts in replaceBody. llvm::AlignedCharArrayUnion< DefinedCommon, DefinedRegular, DefinedSynthetic, - Undefined, SharedSymbol, LazyArchive, LazyObject> + Undefined, SharedSymbol, + LazyArchive, LazyObject> Body; SymbolBody *body() { return reinterpret_cast(Body.buffer); } diff --git a/contrib/llvm/tools/lld/ELF/Target.cpp b/contrib/llvm/tools/lld/ELF/Target.cpp index edae7c65c1b4..d82e654b9c4c 100644 --- a/contrib/llvm/tools/lld/ELF/Target.cpp +++ b/contrib/llvm/tools/lld/ELF/Target.cpp @@ -1730,8 +1730,11 @@ void ARMTargetInfo::writePlt(uint8_t *Buf, uint64_t GotEntryAddr, RelExpr ARMTargetInfo::getThunkExpr(RelExpr Expr, uint32_t RelocType, const InputFile &File, const SymbolBody &S) const { - // If S is an undefined weak symbol we don't need a Thunk - if (S.isUndefined()) + // If S is an undefined weak symbol in an executable we don't need a Thunk. + // In a DSO calls to undefined symbols, including weak ones get PLT entries + // which may need a thunk. + if (S.isUndefined() && !S.isLocal() && S.symbol()->isWeak() + && !Config->Shared) return Expr; // A state change from ARM to Thumb and vice versa must go through an // interworking thunk if the relocation type is not R_ARM_CALL or diff --git a/contrib/llvm/tools/lld/ELF/Thunks.cpp b/contrib/llvm/tools/lld/ELF/Thunks.cpp index 34b630ac2510..397a0ee66319 100644 --- a/contrib/llvm/tools/lld/ELF/Thunks.cpp +++ b/contrib/llvm/tools/lld/ELF/Thunks.cpp @@ -226,6 +226,8 @@ static void addThunkARM(uint32_t Reloc, SymbolBody &S, InputSection &IS) { Sym->ThunkData = T; else if (auto *Sym = dyn_cast>(&S)) Sym->ThunkData = T; + else if (auto *Sym = dyn_cast>(&S)) + Sym->ThunkData = T; else fatal("symbol not DefinedRegular or Shared"); } diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp index e056384fbd44..154de8cf6d18 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.cpp +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -625,15 +625,12 @@ void PhdrEntry::add(OutputSectionBase *Sec) { } template -static Symbol *addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, - typename ELFT::uint Val, - uint8_t StOther = STV_HIDDEN) { - SymbolBody *S = Symtab::X->find(Name); - if (!S) - return nullptr; - if (!S->isUndefined() && !S->isShared()) - return S->symbol(); - return Symtab::X->addSynthetic(Name, Sec, Val, StOther); +static void addOptionalSynthetic(StringRef Name, OutputSectionBase *Sec, + typename ELFT::uint Val, + uint8_t StOther = STV_HIDDEN) { + if (SymbolBody *S = Symtab::X->find(Name)) + if (S->isUndefined() || S->isShared()) + Symtab::X->addSynthetic(Name, Sec, Val, StOther); } template @@ -1447,8 +1444,13 @@ template void Writer::setPhdrs() { } if (P.p_type == PT_LOAD) P.p_align = Config->MaxPageSize; - else if (P.p_type == PT_GNU_RELRO) + else if (P.p_type == PT_GNU_RELRO) { P.p_align = 1; + // The glibc dynamic loader rounds the size down, so we need to round up + // to protect the last page. This is a no-op on FreeBSD which always + // rounds up. + P.p_memsz = alignTo(P.p_memsz, Config->MaxPageSize); + } // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp index 82473fba065b..ae907ac8dfbb 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.cpp @@ -2854,6 +2854,11 @@ bool RenderScriptRuntime::LoadModule(const lldb::ModuleSP &module_sp) { module_desc.reset(new RSModuleDescriptor(module_sp)); if (module_desc->ParseRSInfo()) { m_rsmodules.push_back(module_desc); + module_desc->WarnIfVersionMismatch(GetProcess() + ->GetTarget() + .GetDebugger() + .GetAsyncOutputStream() + .get()); module_loaded = true; } if (module_loaded) { @@ -2923,6 +2928,25 @@ void RenderScriptRuntime::Update() { } } +void RSModuleDescriptor::WarnIfVersionMismatch(lldb_private::Stream *s) const { + if (!s) + return; + + if (m_slang_version.empty() || m_bcc_version.empty()) { + s->PutCString("WARNING: Unknown bcc or slang (llvm-rs-cc) version; debug " + "experience may be unreliable"); + s->EOL(); + } else if (m_slang_version != m_bcc_version) { + s->Printf("WARNING: The debug info emitted by the slang frontend " + "(llvm-rs-cc) used to build this module (%s) does not match the " + "version of bcc used to generate the debug information (%s). " + "This is an unsupported configuration and may result in a poor " + "debugging experience; proceed with caution", + m_slang_version.c_str(), m_bcc_version.c_str()); + s->EOL(); + } +} + bool RSModuleDescriptor::ParsePragmaCount(llvm::StringRef *lines, size_t n_lines) { // Skip the pragma prototype line @@ -2990,6 +3014,22 @@ bool RSModuleDescriptor::ParseExportReduceCount(llvm::StringRef *lines, return true; } +bool RSModuleDescriptor::ParseVersionInfo(llvm::StringRef *lines, + size_t n_lines) { + // Skip the versionInfo line + ++lines; + for (; n_lines--; ++lines) { + // We're only interested in bcc and slang versions, and ignore all other + // versionInfo lines + const auto kv_pair = lines->split(" - "); + if (kv_pair.first == "slang") + m_slang_version = kv_pair.second.str(); + else if (kv_pair.first == "bcc") + m_bcc_version = kv_pair.second.str(); + } + return true; +} + bool RSModuleDescriptor::ParseExportForeachCount(llvm::StringRef *lines, size_t n_lines) { // Skip the exportForeachCount line @@ -3054,7 +3094,8 @@ bool RSModuleDescriptor::ParseRSInfo() { eExportReduce, ePragma, eBuildChecksum, - eObjectSlot + eObjectSlot, + eVersionInfo, }; const auto rs_info_handler = [](llvm::StringRef name) -> int { @@ -3070,6 +3111,7 @@ bool RSModuleDescriptor::ParseRSInfo() { // script .Case("pragmaCount", ePragma) .Case("objectSlotCount", eObjectSlot) + .Case("versionInfo", eVersionInfo) .Default(-1); }; @@ -3108,6 +3150,9 @@ bool RSModuleDescriptor::ParseRSInfo() { case ePragma: success = ParsePragmaCount(line, n_lines); break; + case eVersionInfo: + success = ParseVersionInfo(line, n_lines); + break; default: { if (log) log->Printf("%s - skipping .rs.info field '%s'", __FUNCTION__, diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h index a1211a2814b7..5b2bb57ac8c8 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptRuntime.h @@ -203,6 +203,11 @@ struct RSReductionDescriptor { }; class RSModuleDescriptor { + std::string m_slang_version; + std::string m_bcc_version; + + bool ParseVersionInfo(llvm::StringRef *, size_t n_lines); + bool ParseExportForeachCount(llvm::StringRef *, size_t n_lines); bool ParseExportVarCount(llvm::StringRef *, size_t n_lines); @@ -222,6 +227,8 @@ public: void Dump(Stream &strm) const; + void WarnIfVersionMismatch(Stream *s) const; + const lldb::ModuleSP m_module; std::vector m_kernels; std::vector m_globals; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp index 2adc0d3f6c89..0143146b79ab 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp @@ -202,7 +202,6 @@ ELFLinuxPrStatus::ELFLinuxPrStatus() { Error ELFLinuxPrStatus::Parse(DataExtractor &data, ArchSpec &arch) { Error error; - ByteOrder byteorder = data.GetByteOrder(); if (GetSize(arch) > data.GetByteSize()) { error.SetErrorStringWithFormat( "NT_PRSTATUS size should be %zu, but the remaining bytes are: %" PRIu64, @@ -210,50 +209,36 @@ Error ELFLinuxPrStatus::Parse(DataExtractor &data, ArchSpec &arch) { return error; } - switch (arch.GetCore()) { -//case ArchSpec::eCore_s390x_generic: - case ArchSpec::eCore_x86_64_x86_64: - data.ExtractBytes(0, sizeof(ELFLinuxPrStatus), byteorder, this); - break; - case ArchSpec::eCore_x86_32_i386: - case ArchSpec::eCore_x86_32_i486: { - // Parsing from a 32 bit ELF core file, and populating/reusing the structure - // properly, because the struct is for the 64 bit version - offset_t offset = 0; - si_signo = data.GetU32(&offset); - si_code = data.GetU32(&offset); - si_errno = data.GetU32(&offset); + // Read field by field to correctly account for endianess + // of both the core dump and the platform running lldb. + offset_t offset = 0; + si_signo = data.GetU32(&offset); + si_code = data.GetU32(&offset); + si_errno = data.GetU32(&offset); - pr_cursig = data.GetU16(&offset); - offset += 2; // pad + pr_cursig = data.GetU16(&offset); + offset += 2; // pad - pr_sigpend = data.GetU32(&offset); - pr_sighold = data.GetU32(&offset); + pr_sigpend = data.GetPointer(&offset); + pr_sighold = data.GetPointer(&offset); - pr_pid = data.GetU32(&offset); - pr_ppid = data.GetU32(&offset); - pr_pgrp = data.GetU32(&offset); - pr_sid = data.GetU32(&offset); + pr_pid = data.GetU32(&offset); + pr_ppid = data.GetU32(&offset); + pr_pgrp = data.GetU32(&offset); + pr_sid = data.GetU32(&offset); - pr_utime.tv_sec = data.GetU32(&offset); - pr_utime.tv_usec = data.GetU32(&offset); + pr_utime.tv_sec = data.GetPointer(&offset); + pr_utime.tv_usec = data.GetPointer(&offset); - pr_stime.tv_sec = data.GetU32(&offset); - pr_stime.tv_usec = data.GetU32(&offset); + pr_stime.tv_sec = data.GetPointer(&offset); + pr_stime.tv_usec = data.GetPointer(&offset); - pr_cutime.tv_sec = data.GetU32(&offset); - pr_cutime.tv_usec = data.GetU32(&offset); + pr_cutime.tv_sec = data.GetPointer(&offset); + pr_cutime.tv_usec = data.GetPointer(&offset); - pr_cstime.tv_sec = data.GetU32(&offset); - pr_cstime.tv_usec = data.GetU32(&offset); + pr_cstime.tv_sec = data.GetPointer(&offset); + pr_cstime.tv_usec = data.GetPointer(&offset); - break; - } - default: - error.SetErrorStringWithFormat("ELFLinuxPrStatus::%s Unknown architecture", - __FUNCTION__); - break; - } return error; } @@ -274,49 +259,37 @@ Error ELFLinuxPrPsInfo::Parse(DataExtractor &data, ArchSpec &arch) { GetSize(arch), data.GetByteSize()); return error; } + size_t size = 0; + offset_t offset = 0; - switch (arch.GetCore()) { -//case ArchSpec::eCore_s390x_generic: - case ArchSpec::eCore_x86_64_x86_64: - data.ExtractBytes(0, sizeof(ELFLinuxPrPsInfo), byteorder, this); - break; - case ArchSpec::eCore_x86_32_i386: - case ArchSpec::eCore_x86_32_i486: { - // Parsing from a 32 bit ELF core file, and populating/reusing the structure - // properly, because the struct is for the 64 bit version - size_t size = 0; - offset_t offset = 0; - - pr_state = data.GetU8(&offset); - pr_sname = data.GetU8(&offset); - pr_zomb = data.GetU8(&offset); - pr_nice = data.GetU8(&offset); - - pr_flag = data.GetU32(&offset); - pr_uid = data.GetU16(&offset); - pr_gid = data.GetU16(&offset); - - pr_pid = data.GetU32(&offset); - pr_ppid = data.GetU32(&offset); - pr_pgrp = data.GetU32(&offset); - pr_sid = data.GetU32(&offset); - - size = 16; - data.ExtractBytes(offset, size, byteorder, pr_fname); - offset += size; - - size = 80; - data.ExtractBytes(offset, size, byteorder, pr_psargs); - offset += size; - - break; - } - default: - error.SetErrorStringWithFormat("ELFLinuxPrPsInfo::%s Unknown architecture", - __FUNCTION__); - break; + pr_state = data.GetU8(&offset); + pr_sname = data.GetU8(&offset); + pr_zomb = data.GetU8(&offset); + pr_nice = data.GetU8(&offset); + if (data.GetAddressByteSize() == 8) { + // Word align the next field on 64 bit. + offset += 4; } + pr_flag = data.GetPointer(&offset); + + // 16 bit on 32 bit platforms, 32 bit on 64 bit platforms + pr_uid = data.GetMaxU64(&offset, data.GetAddressByteSize() >> 1); + pr_gid = data.GetMaxU64(&offset, data.GetAddressByteSize() >> 1); + + pr_pid = data.GetU32(&offset); + pr_ppid = data.GetU32(&offset); + pr_pgrp = data.GetU32(&offset); + pr_sid = data.GetU32(&offset); + + size = 16; + data.ExtractBytes(offset, size, byteorder, pr_fname); + offset += size; + + size = 80; + data.ExtractBytes(offset, size, byteorder, pr_psargs); + offset += size; + return error; } @@ -329,7 +302,6 @@ ELFLinuxSigInfo::ELFLinuxSigInfo() { Error ELFLinuxSigInfo::Parse(DataExtractor &data, const ArchSpec &arch) { Error error; - ByteOrder byteorder = data.GetByteOrder(); if (GetSize(arch) > data.GetByteSize()) { error.SetErrorStringWithFormat( "NT_SIGINFO size should be %zu, but the remaining bytes are: %" PRIu64, @@ -337,27 +309,12 @@ Error ELFLinuxSigInfo::Parse(DataExtractor &data, const ArchSpec &arch) { return error; } - switch (arch.GetCore()) { - case ArchSpec::eCore_x86_64_x86_64: - data.ExtractBytes(0, sizeof(ELFLinuxPrStatus), byteorder, this); - break; -//case ArchSpec::eCore_s390x_generic: - case ArchSpec::eCore_x86_32_i386: - case ArchSpec::eCore_x86_32_i486: { - // Parsing from a 32 bit ELF core file, and populating/reusing the structure - // properly, because the struct is for the 64 bit version - offset_t offset = 0; - si_signo = data.GetU32(&offset); - si_code = data.GetU32(&offset); - si_errno = data.GetU32(&offset); - - break; - } - default: - error.SetErrorStringWithFormat("ELFLinuxSigInfo::%s Unknown architecture", - __FUNCTION__); - break; - } + // Parsing from a 32 bit ELF core file, and populating/reusing the structure + // properly, because the struct is for the 64 bit version + offset_t offset = 0; + si_signo = data.GetU32(&offset); + si_code = data.GetU32(&offset); + si_errno = data.GetU32(&offset); return error; } diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index f97a18448f0a..b84c4a83dee4 100644 --- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -322,16 +322,15 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, switch(CodeID) { default:return nullptr; STRINGIFY_CODE(METADATA, STRING_OLD) - STRINGIFY_CODE(METADATA, STRINGS) - STRINGIFY_CODE(METADATA, NAME) - STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK - STRINGIFY_CODE(METADATA, NODE) STRINGIFY_CODE(METADATA, VALUE) + STRINGIFY_CODE(METADATA, NODE) + STRINGIFY_CODE(METADATA, NAME) + STRINGIFY_CODE(METADATA, DISTINCT_NODE) + STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK + STRINGIFY_CODE(METADATA, LOCATION) STRINGIFY_CODE(METADATA, OLD_NODE) STRINGIFY_CODE(METADATA, OLD_FN_NODE) STRINGIFY_CODE(METADATA, NAMED_NODE) - STRINGIFY_CODE(METADATA, DISTINCT_NODE) - STRINGIFY_CODE(METADATA, LOCATION) STRINGIFY_CODE(METADATA, GENERIC_DEBUG) STRINGIFY_CODE(METADATA, SUBRANGE) STRINGIFY_CODE(METADATA, ENUMERATOR) @@ -353,6 +352,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(METADATA, OBJC_PROPERTY) STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) STRINGIFY_CODE(METADATA, MODULE) + STRINGIFY_CODE(METADATA, MACRO) + STRINGIFY_CODE(METADATA, MACRO_FILE) + STRINGIFY_CODE(METADATA, STRINGS) + STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT) + STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR) STRINGIFY_CODE(METADATA, INDEX_OFFSET) STRINGIFY_CODE(METADATA, INDEX) } diff --git a/contrib/llvm/tools/llvm-link/llvm-link.cpp b/contrib/llvm/tools/llvm-link/llvm-link.cpp index 43431ac3398a..e89696e7e7c2 100644 --- a/contrib/llvm/tools/llvm-link/llvm-link.cpp +++ b/contrib/llvm/tools/llvm-link/llvm-link.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include @@ -202,19 +203,20 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *C) { } /// Import any functions requested via the -import option. -static bool importFunctions(const char *argv0, LLVMContext &Context, - Linker &L) { +static bool importFunctions(const char *argv0, Module &DestModule) { if (SummaryIndex.empty()) return true; std::unique_ptr Index = ExitOnErr(llvm::getModuleSummaryIndexForFile(SummaryIndex)); // Map of Module -> List of globals to import from the Module - std::map> ModuleToGlobalsToImportMap; - auto ModuleLoader = [&Context](const char *argv0, - const std::string &Identifier) { - return loadFile(argv0, Identifier, Context, false); + FunctionImporter::ImportMapTy ImportList; + + auto ModuleLoader = [&DestModule](const char *argv0, + const std::string &Identifier) { + return loadFile(argv0, Identifier, DestModule.getContext(), false); }; + ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); for (const auto &Import : Imports) { // Identify the requested function and its bitcode source file. @@ -253,35 +255,14 @@ static bool importFunctions(const char *argv0, LLVMContext &Context, if (Verbose) errs() << "Importing " << FunctionName << " from " << FileName << "\n"; - auto &Entry = ModuleToGlobalsToImportMap[SrcModule.getModuleIdentifier()]; - Entry.insert(F); - - ExitOnErr(F->materialize()); - } - - // Do the actual import of globals now, one Module at a time - for (auto &GlobalsToImportPerModule : ModuleToGlobalsToImportMap) { - // Get the module for the import - auto &GlobalsToImport = GlobalsToImportPerModule.second; - std::unique_ptr SrcModule = - ModuleLoaderCache.takeModule(GlobalsToImportPerModule.first); - assert(&Context == &SrcModule->getContext() && "Context mismatch"); - - // If modules were created with lazy metadata loading, materialize it - // now, before linking it (otherwise this will be a noop). - ExitOnErr(SrcModule->materializeMetadata()); - UpgradeDebugInfo(*SrcModule); - - // Linkage Promotion and renaming - if (renameModuleForThinLTO(*SrcModule, *Index, &GlobalsToImport)) - return true; - - // Instruct the linker to not automatically import linkonce defintion. - unsigned Flags = Linker::Flags::DontForceLinkLinkonceODR; - - if (L.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport)) - return false; + auto &Entry = ImportList[FileName]; + Entry.insert(std::make_pair(F->getGUID(), /* (Unused) threshold */ 1.0)); } + auto CachedModuleLoader = [&](StringRef Identifier) { + return ModuleLoaderCache.takeModule(Identifier); + }; + FunctionImporter Importer(*Index, CachedModuleLoader); + ExitOnErr(Importer.importFunctions(DestModule, ImportList)); return true; } @@ -374,7 +355,7 @@ int main(int argc, char **argv) { return 1; // Import any functions requested via -import - if (!importFunctions(argv[0], Context, L)) + if (!importFunctions(argv[0], *Composite)) return 1; if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;