From 9c2f6c4bb805c7ac08c8925c96e429fcc322725e Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 14 Feb 2020 21:05:15 +0000 Subject: [PATCH 1/2] Remove unused utility script. --- clang/utils/convert_arm_neon.py | 172 -------------------------------- 1 file changed, 172 deletions(-) delete mode 100644 clang/utils/convert_arm_neon.py diff --git a/clang/utils/convert_arm_neon.py b/clang/utils/convert_arm_neon.py deleted file mode 100644 index c4b364529457..000000000000 --- a/clang/utils/convert_arm_neon.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 - -# This script was committed on 20/11/2019 and it would probably make sense to remove -# it after the next release branches. - -# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file -# using the old single-char type modifiers to an equivalent new-style form where -# each modifier is orthogonal and they can be composed. -# -# It was used to directly generate the .td files on master, so if you have any -# local additions I would suggest implementing any modifiers here, and running -# it over your entire pre-merge .td files rather than trying to resolve any -# conflicts manually. - -import re, sys -MOD_MAP = { - 'v': 'v', - 'x': 'S', - 'u': 'U', - 'd': '.', - 'g': 'q', - 'j': 'Q', - 'w': '>Q', - 'n': '>', - 'h': '<', - 'q': '', - 's': '1', - 'z': '1<', - 'r': '1>', - 'b': '1U', - '$': '1S', - 'k': 'Q', - '2': '2', - '3': '3', - '4': '4', - 'B': '2Q', - 'C': '3Q', - 'D': '4Q', - 'p': '*', - 'c': 'c*', - '7': '< desired: - res += '<' - cur /= 2 - return res - - -def remap_protocol(proto, typespec, name): - key_type = 0 - - # Conversions like to see the integer type so they know signedness. - if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32': - key_type = 1 - default_width = typespec_elt_size(typespec) - inconsistent_width = False - for elt in typespec: - new_width = typespec_elt_size(elt) - if new_width and new_width != default_width: - inconsistent_width = True - - res = '' - for i, c in enumerate(proto): - # void and pointers make for bad discriminators in CGBuiltin.cpp. - if c in 'vcp': - key_type += 1 - - if c in MOD_MAP: - cur_mod = MOD_MAP[c] - elif inconsistent_width: - # Otherwise it's a fixed output width modifier. - sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n') - - if c == 'Y': - # y: scalar of half float - resize = get_resize(default_width, 16) - cur_mod = f'1F{resize}' - elif c == 'y': - # y: scalar of float - resize = get_resize(default_width, 32) - cur_mod = f'1F{resize}' - elif c == 'o': - # o: scalar of double - resize = get_resize(default_width, 64) - cur_mod = f'1F{resize}' - elif c == 'I': - # I: scalar of 32-bit signed - resize = get_resize(default_width, 32) - cur_mod = f'1S{resize}' - elif c == 'L': - # L: scalar of 64-bit signed - resize = get_resize(default_width, 64) - cur_mod = f'1S{resize}' - elif c == 'U': - # I: scalar of 32-bit unsigned - resize = get_resize(default_width, 32) - cur_mod = f'1U{resize}' - elif c == 'O': - # O: scalar of 64-bit unsigned - resize = get_resize(default_width, 64) - cur_mod = f'1U{resize}' - elif c == 'f': - # f: float (int args) - resize = get_resize(default_width, 32) - cur_mod = f'F{resize}' - elif c == 'F': - # F: double (int args) - resize = get_resize(default_width, 64) - cur_mod = f'F{resize}' - elif c == 'H': - # H: half (int args) - resize = get_resize(default_width, 16) - cur_mod = f'F{resize}' - elif c == '0': - # 0: half (int args), ignore 'Q' size modifier. - resize = get_resize(default_width, 16) - cur_mod = f'Fq{resize}' - elif c == '1': - # 1: half (int args), force 'Q' size modifier. - resize = get_resize(default_width, 16) - cur_mod = f'FQ{resize}' - - if len(cur_mod) == 0: - raise Exception(f'WTF: {c} in {name}') - - if key_type != 0 and key_type == i: - cur_mod += '!' - - if len(cur_mod) == 1: - res += cur_mod - else: - res += '(' + cur_mod + ')' - - return res - -def replace_insts(m): - start, end = m.span('proto') - start -= m.start() - end -= m.start() - new_proto = remap_protocol(m['proto'], m['kinds'], m['name']) - return m.group()[:start] + new_proto + m.group()[end:] - -INST = re.compile(r'Inst<"(?P.*?)",\s*"(?P.*?)",\s*"(?P.*?)"') - -new_td = INST.sub(replace_insts, sys.stdin.read()) -sys.stdout.write(new_td) From d75c7debad4509ece98792074e64b8a650a27bdb Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 14 Feb 2020 21:24:03 +0000 Subject: [PATCH 2/2] Vendor import of llvm-project branch release/10.x llvmorg-10.0.0-rc2-0-g90c78073f73. --- clang/include/clang/AST/ASTConcept.h | 6 +- clang/include/clang/AST/ExprConcepts.h | 13 ++ clang/include/clang/Basic/Cuda.h | 3 +- .../clang/Basic/DiagnosticDriverKinds.td | 3 + clang/include/clang/Basic/DiagnosticGroups.td | 11 +- .../clang/Basic/DiagnosticSemaKinds.td | 18 +- clang/include/clang/Driver/CC1Options.td | 2 + clang/include/clang/Driver/Job.h | 16 +- clang/include/clang/Lex/PreprocessorOptions.h | 3 + clang/include/clang/Sema/Sema.h | 15 +- clang/include/clang/Sema/SemaConcept.h | 12 +- clang/lib/AST/ASTConcept.cpp | 4 +- clang/lib/AST/ASTContext.cpp | 8 +- clang/lib/AST/CXXInheritance.cpp | 2 + clang/lib/AST/DeclCXX.cpp | 32 ++- clang/lib/AST/Expr.cpp | 5 + clang/lib/AST/ExprConcepts.cpp | 63 ++++-- clang/lib/AST/StmtProfile.cpp | 4 +- clang/lib/Basic/Cuda.cpp | 8 +- clang/lib/CodeGen/CodeGenModule.cpp | 7 + clang/lib/Driver/Compilation.cpp | 13 +- clang/lib/Driver/Driver.cpp | 5 + clang/lib/Driver/Job.cpp | 19 +- clang/lib/Driver/ToolChains/Clang.cpp | 7 +- clang/lib/Driver/ToolChains/Cuda.cpp | 41 ++-- clang/lib/Frontend/CompilerInvocation.cpp | 1 + clang/lib/Headers/__clang_cuda_intrinsics.h | 4 +- .../Headers/__clang_cuda_runtime_wrapper.h | 2 +- clang/lib/Headers/xmmintrin.h | 4 +- clang/lib/Lex/Lexer.cpp | 4 +- clang/lib/Lex/Pragma.cpp | 33 +-- clang/lib/Parse/ParseDecl.cpp | 11 +- clang/lib/Parse/ParseDeclCXX.cpp | 2 +- clang/lib/Parse/ParseExprCXX.cpp | 27 +-- clang/lib/Sema/SemaCast.cpp | 18 ++ clang/lib/Sema/SemaConcept.cpp | 70 +++--- clang/lib/Sema/SemaDecl.cpp | 1 + clang/lib/Sema/SemaDeclCXX.cpp | 54 ++++- clang/lib/Sema/SemaExpr.cpp | 49 ++-- clang/lib/Sema/SemaExprCXX.cpp | 3 +- clang/lib/Sema/SemaOverload.cpp | 88 ++++--- clang/lib/Sema/SemaTemplate.cpp | 6 +- clang/lib/Sema/SemaTemplateDeduction.cpp | 40 ++-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 186 ++++++++++++++- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 96 ++++++-- clang/lib/Serialization/ASTReaderDecl.cpp | 2 +- .../StaticAnalyzer/Core/HTMLDiagnostics.cpp | 11 +- clang/tools/driver/cc1_main.cpp | 3 +- clang/tools/driver/cc1as_main.cpp | 3 +- .../lib/tsan/rtl/tsan_interceptors_mac.cpp | 9 +- libcxx/include/__config | 4 + lld/ELF/Arch/ARM.cpp | 8 +- lld/ELF/Arch/PPC.cpp | 37 +-- lld/ELF/InputSection.cpp | 8 + lld/ELF/Relocations.cpp | 60 +++-- lld/ELF/SyntheticSections.cpp | 16 +- lld/ELF/SyntheticSections.h | 8 +- lld/ELF/Thunks.cpp | 45 +++- lld/docs/ReleaseNotes.rst | 36 ++- lldb/source/DataFormatters/FormatCache.cpp | 4 + .../DataFormatters/LanguageCategory.cpp | 4 + lldb/source/Interpreter/CommandAlias.cpp | 3 +- lldb/source/Interpreter/Options.cpp | 4 +- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 162 +++++-------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 35 +-- llvm/include/llvm/ADT/StringRef.h | 3 +- llvm/include/llvm/CodeGen/AsmPrinter.h | 3 +- .../llvm/ExecutionEngine/Orc/CompileUtils.h | 19 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 13 +- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 29 ++- llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h | 4 +- llvm/include/llvm/ExecutionEngine/Orc/Layer.h | 89 ++++---- .../llvm/ExecutionEngine/Orc/Speculation.h | 4 +- .../llvm/Support/CrashRecoveryContext.h | 3 +- llvm/include/llvm/Support/Process.h | 6 + .../llvm/Transforms/IPO/PassManagerBuilder.h | 28 ++- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 +- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 4 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 24 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 3 - llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 9 +- llvm/lib/CodeGen/CodeGenPrepare.cpp | 12 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 16 +- llvm/lib/CodeGen/GlobalMerge.cpp | 2 + llvm/lib/CodeGen/LiveDebugVariables.cpp | 77 +++++-- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 12 +- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 17 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 4 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 28 +-- llvm/lib/CodeGen/StackColoring.cpp | 16 +- llvm/lib/CodeGen/TypePromotion.cpp | 12 +- .../Orc/CompileOnDemandLayer.cpp | 39 ++-- llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp | 33 ++- llvm/lib/ExecutionEngine/Orc/Core.cpp | 77 ++++--- .../ExecutionEngine/Orc/IRCompileLayer.cpp | 11 +- .../ExecutionEngine/Orc/IRTransformLayer.cpp | 8 +- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 10 +- llvm/lib/ExecutionEngine/Orc/Layer.cpp | 51 ++++- .../Orc/RTDyldObjectLinkingLayer.cpp | 43 +++- llvm/lib/IR/AsmWriter.cpp | 4 +- llvm/lib/Linker/IRMover.cpp | 70 ++++-- llvm/lib/Support/CRC.cpp | 10 +- llvm/lib/Support/CrashRecoveryContext.cpp | 91 +++++--- llvm/lib/Support/ErrorHandling.cpp | 3 +- llvm/lib/Support/Process.cpp | 10 +- llvm/lib/Support/Windows/Signals.inc | 8 +- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 20 ++ .../AArch64/AArch64LoadStoreOptimizer.cpp | 7 + .../AArch64/AArch64TargetTransformInfo.h | 16 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 3 - .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 - .../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp | 79 ++++++- llvm/lib/Target/AMDGPU/CaymanInstructions.td | 4 +- .../Target/AMDGPU/EvergreenInstructions.td | 3 +- llvm/lib/Target/AMDGPU/R600Instructions.td | 7 +- llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 5 +- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 10 +- .../AMDGPU/SIRemoveShortExecBranches.cpp | 158 ------------- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 2 +- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 18 -- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 49 ---- llvm/lib/Target/ARM/ARMISelLowering.cpp | 57 +---- llvm/lib/Target/ARM/ARMISelLowering.h | 8 +- llvm/lib/Target/ARM/ARMInstrInfo.td | 27 --- llvm/lib/Target/ARM/ARMInstrThumb2.td | 9 +- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 21 +- llvm/lib/Target/BPF/BPFISelLowering.h | 13 ++ .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 7 +- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 5 +- llvm/lib/Target/RISCV/RISCV.td | 9 + llvm/lib/Target/RISCV/RISCVInstrFormats.td | 3 +- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 109 +++++---- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 64 ++++-- llvm/lib/Target/RISCV/RISCVInstrInfoC.td | 156 ++++++++----- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 69 ++++-- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 72 ++++-- llvm/lib/Target/RISCV/RISCVInstrInfoM.td | 39 ++-- llvm/lib/Target/RISCV/RISCVSchedRocket32.td | 213 +++++++++++++++++ llvm/lib/Target/RISCV/RISCVSchedRocket64.td | 214 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVSchedule.td | 138 +++++++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 14 +- llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp | 24 +- llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h | 2 + .../Target/SystemZ/SystemZISelLowering.cpp | 5 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 5 +- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 14 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86MCInstLower.cpp | 19 ++ .../lib/Transforms/IPO/PassManagerBuilder.cpp | 41 +++- .../InstCombineLoadStoreAlloca.cpp | 5 + .../InstCombine/InstCombineSelect.cpp | 6 + .../InstCombine/InstructionCombining.cpp | 3 +- .../Instrumentation/MemorySanitizer.cpp | 43 ++++ .../Vectorize/LoopVectorizationPlanner.h | 7 +- .../Transforms/Vectorize/LoopVectorize.cpp | 16 +- llvm/tools/lli/lli.cpp | 21 +- 158 files changed, 2867 insertions(+), 1374 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVSchedRocket32.td create mode 100644 llvm/lib/Target/RISCV/RISCVSchedRocket64.td create mode 100644 llvm/lib/Target/RISCV/RISCVSchedule.td diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h index 30c4706d2a15..3ebaad4eafdd 100644 --- a/clang/include/clang/AST/ASTConcept.h +++ b/clang/include/clang/AST/ASTConcept.h @@ -29,14 +29,14 @@ class ConceptSpecializationExpr; class ConstraintSatisfaction : public llvm::FoldingSetNode { // The template-like entity that 'owns' the constraint checked here (can be a // constrained entity or a concept). - NamedDecl *ConstraintOwner = nullptr; + const NamedDecl *ConstraintOwner = nullptr; llvm::SmallVector TemplateArgs; public: ConstraintSatisfaction() = default; - ConstraintSatisfaction(NamedDecl *ConstraintOwner, + ConstraintSatisfaction(const NamedDecl *ConstraintOwner, ArrayRef TemplateArgs) : ConstraintOwner(ConstraintOwner), TemplateArgs(TemplateArgs.begin(), TemplateArgs.end()) { } @@ -57,7 +57,7 @@ class ConstraintSatisfaction : public llvm::FoldingSetNode { } static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &C, - NamedDecl *ConstraintOwner, + const NamedDecl *ConstraintOwner, ArrayRef TemplateArgs); }; diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h index 2a64326e8604..271d487e2fc9 100644 --- a/clang/include/clang/AST/ExprConcepts.h +++ b/clang/include/clang/AST/ExprConcepts.h @@ -63,6 +63,12 @@ class ConceptSpecializationExpr final : public Expr, public ConceptReference, ArrayRef ConvertedArgs, const ConstraintSatisfaction *Satisfaction); + ConceptSpecializationExpr(const ASTContext &C, ConceptDecl *NamedConcept, + ArrayRef ConvertedArgs, + const ConstraintSatisfaction *Satisfaction, + bool Dependent, + bool ContainsUnexpandedParameterPack); + ConceptSpecializationExpr(EmptyShell Empty, unsigned NumTemplateArgs); public: @@ -75,6 +81,13 @@ class ConceptSpecializationExpr final : public Expr, public ConceptReference, ArrayRef ConvertedArgs, const ConstraintSatisfaction *Satisfaction); + static ConceptSpecializationExpr * + Create(const ASTContext &C, ConceptDecl *NamedConcept, + ArrayRef ConvertedArgs, + const ConstraintSatisfaction *Satisfaction, + bool Dependent, + bool ContainsUnexpandedParameterPack); + static ConceptSpecializationExpr * Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs); diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index ef5d24dcf888..da572957d10d 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -11,6 +11,7 @@ namespace llvm { class StringRef; +class Twine; class VersionTuple; } // namespace llvm @@ -30,7 +31,7 @@ enum class CudaVersion { }; const char *CudaVersionToString(CudaVersion V); // Input is "Major.Minor" -CudaVersion CudaStringToVersion(llvm::StringRef S); +CudaVersion CudaStringToVersion(const llvm::Twine &S); enum class CudaArch { UNKNOWN, diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2da41bef2669..ecd871e36ee8 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -60,6 +60,9 @@ def err_drv_cuda_version_unsupported : Error< "but installation at %3 is %4. Use --cuda-path to specify a different CUDA " "install, pass a different GPU arch with --cuda-gpu-arch, or pass " "--no-cuda-version-check.">; +def warn_drv_unknown_cuda_version: Warning< + "Unknown CUDA version %0. Assuming the latest supported version %1">, + InGroup; def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host compilation.">; def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not supported.">; def err_drv_invalid_thread_model_for_target : Error< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index a15fb908c537..5ad07915d2f5 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -384,7 +384,10 @@ def GNULabelsAsValue : DiagGroup<"gnu-label-as-value">; def LiteralRange : DiagGroup<"literal-range">; def LocalTypeTemplateArgs : DiagGroup<"local-type-template-args", [CXX98CompatLocalTypeTemplateArgs]>; -def RangeLoopAnalysis : DiagGroup<"range-loop-analysis">; +def RangeLoopConstruct : DiagGroup<"range-loop-construct">; +def RangeLoopBindReference : DiagGroup<"range-loop-bind-reference">; +def RangeLoopAnalysis : DiagGroup<"range-loop-analysis", + [RangeLoopConstruct, RangeLoopBindReference]>; def ForLoopAnalysis : DiagGroup<"for-loop-analysis">; def LoopAnalysis : DiagGroup<"loop-analysis", [ForLoopAnalysis, RangeLoopAnalysis]>; @@ -858,14 +861,15 @@ def Most : DiagGroup<"most", [ Comment, DeleteNonVirtualDtor, Format, + ForLoopAnalysis, Implicit, InfiniteRecursion, IntInBoolContext, - LoopAnalysis, MismatchedTags, MissingBraces, Move, MultiChar, + RangeLoopConstruct, Reorder, ReturnType, SelfAssignment, @@ -1113,6 +1117,9 @@ def SerializedDiagnostics : DiagGroup<"serialized-diagnostics">; // compiling CUDA C/C++ but which is not compatible with the CUDA spec. def CudaCompat : DiagGroup<"cuda-compat">; +// Warning about unknown CUDA SDK version. +def CudaUnknownVersion: DiagGroup<"unknown-cuda-version">; + // A warning group for warnings about features supported by HIP but // ignored by CUDA. def HIPOnly : DiagGroup<"hip-only">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7636d04a34c3..2199dfbddc84 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2378,17 +2378,17 @@ def warn_for_range_const_reference_copy : Warning< "loop variable %0 " "%diff{has type $ but is initialized with type $" "| is initialized with a value of a different type}1,2 resulting in a copy">, - InGroup, DefaultIgnore; + InGroup, DefaultIgnore; def note_use_type_or_non_reference : Note< "use non-reference type %0 to keep the copy or type %1 to prevent copying">; def warn_for_range_variable_always_copy : Warning< "loop variable %0 is always a copy because the range of type %1 does not " "return a reference">, - InGroup, DefaultIgnore; + InGroup, DefaultIgnore; def note_use_non_reference_type : Note<"use non-reference type %0">; def warn_for_range_copy : Warning< "loop variable %0 of type %1 creates a copy from type %2">, - InGroup, DefaultIgnore; + InGroup, DefaultIgnore; def note_use_reference_type : Note<"use reference type %0 to prevent copying">; def err_objc_for_range_init_stmt : Error< "initialization statement is not supported when iterating over Objective-C " @@ -4683,6 +4683,8 @@ def note_checking_constraints_for_var_spec_id_here : Note< def note_checking_constraints_for_class_spec_id_here : Note< "while checking constraint satisfaction for class template partial " "specialization '%0' required here">; +def note_checking_constraints_for_function_here : Note< + "while checking constraint satisfaction for function '%0' required here">; def note_constraint_substitution_here : Note< "while substituting template arguments into constraint expression here">; def note_constraint_normalization_here : Note< @@ -6746,6 +6748,10 @@ def err_bad_cxx_cast_scalar_to_vector_different_size : Error< def err_bad_cxx_cast_vector_to_vector_different_size : Error< "%select{||reinterpret_cast||C-style cast|}0 from vector %1 " "to vector %2 of different size">; +def warn_bad_cxx_cast_nested_pointer_addr_space : Warning< + "%select{reinterpret_cast|C-style cast}0 from %1 to %2 " + "changes address space of nested pointers">, + InGroup; def err_bad_lvalue_to_rvalue_cast : Error< "cannot cast from lvalue of type %1 to rvalue reference type %2; types are " "not compatible">; @@ -8390,6 +8396,12 @@ def note_defaulted_comparison_cannot_deduce : Note< "return type of defaulted 'operator<=>' cannot be deduced because " "return type %2 of three-way comparison for %select{|member|base class}0 %1 " "is not a standard comparison category type">; +def err_defaulted_comparison_cannot_deduce_undeduced_auto : Error< + "return type of defaulted 'operator<=>' cannot be deduced because " + "three-way comparison for %select{|member|base class}0 %1 " + "has a deduced return type and is not yet defined">; +def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note< + "%select{|member|base class}0 %1 declared here">; def note_defaulted_comparison_cannot_deduce_callee : Note< "selected 'operator<=>' for %select{|member|base class}0 %1 declared here">; def err_incorrect_defaulted_comparison_constexpr : Error< diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td index 9387285518de..d1f5ec5a3d4c 100644 --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -859,6 +859,8 @@ def detailed_preprocessing_record : Flag<["-"], "detailed-preprocessing-record"> HelpText<"include a detailed record of preprocessing actions">; def setup_static_analyzer : Flag<["-"], "setup-static-analyzer">, HelpText<"Set up preprocessor for static analyzer (done automatically when static analyzer is run).">; +def disable_pragma_debug_crash : Flag<["-"], "disable-pragma-debug-crash">, + HelpText<"Disable any #pragma clang __debug that can lead to crashing behavior. This is meant for testing.">; //===----------------------------------------------------------------------===// // OpenCL Options diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 0765b3c67d4e..9a3cad23363b 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -55,9 +55,6 @@ class Command { /// The list of program arguments which are inputs. llvm::opt::ArgStringList InputFilenames; - /// Whether to print the input filenames when executing. - bool PrintInputFilenames = false; - /// Response file name, if this command is set to use one, or nullptr /// otherwise const char *ResponseFile = nullptr; @@ -86,6 +83,12 @@ class Command { void writeResponseFile(raw_ostream &OS) const; public: + /// Whether to print the input filenames when executing. + bool PrintInputFilenames = false; + + /// Whether the command will be executed in this process or not. + bool InProcess = false; + Command(const Action &Source, const Tool &Creator, const char *Executable, const llvm::opt::ArgStringList &Arguments, ArrayRef Inputs); @@ -128,9 +131,6 @@ class Command { /// Print a command argument, and optionally quote it. static void printArg(llvm::raw_ostream &OS, StringRef Arg, bool Quote); - /// Set whether to print the input filenames when executing. - void setPrintInputFilenames(bool P) { PrintInputFilenames = P; } - protected: /// Optionally print the filenames to be compiled void PrintFileNames() const; @@ -139,7 +139,9 @@ class Command { /// Use the CC1 tool callback when available, to avoid creating a new process class CC1Command : public Command { public: - using Command::Command; + CC1Command(const Action &Source, const Tool &Creator, const char *Executable, + const llvm::opt::ArgStringList &Arguments, + ArrayRef Inputs); void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo = nullptr) const override; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index abffbd03c3b4..8b2146059f85 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -189,6 +189,9 @@ class PreprocessorOptions { /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; + /// Prevents intended crashes when using #pragma clang __debug. For testing. + bool DisablePragmaDebugCrash = false; + public: PreprocessorOptions() : PrecompiledPreambleBytes(0, false) {} diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a88dd2814487..697d1911be8f 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -6275,7 +6275,7 @@ class Sema final { /// \returns true if an error occurred and satisfaction could not be checked, /// false otherwise. bool CheckConstraintSatisfaction( - NamedDecl *Template, ArrayRef ConstraintExprs, + const NamedDecl *Template, ArrayRef ConstraintExprs, ArrayRef TemplateArgs, SourceRange TemplateIDRange, ConstraintSatisfaction &Satisfaction); @@ -6288,6 +6288,17 @@ class Sema final { bool CheckConstraintSatisfaction(const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction); + /// Check whether the given function decl's trailing requires clause is + /// satisfied, if any. Returns false and updates Satisfaction with the + /// satisfaction verdict if successful, emits a diagnostic and returns true if + /// an error occured and satisfaction could not be determined. + /// + /// \returns true if an error occurred, false otherwise. + bool CheckFunctionConstraints(const FunctionDecl *FD, + ConstraintSatisfaction &Satisfaction, + SourceLocation UsageLoc = SourceLocation()); + + /// \brief Ensure that the given template arguments satisfy the constraints /// associated with the given template, emitting a diagnostic if they do not. /// @@ -6986,7 +6997,7 @@ class Sema final { /// Get a template argument mapping the given template parameter to itself, /// e.g. for X in \c template, this would return an expression template /// argument referencing X. - TemplateArgumentLoc getIdentityTemplateArgumentLoc(Decl *Param, + TemplateArgumentLoc getIdentityTemplateArgumentLoc(NamedDecl *Param, SourceLocation Location); void translateTemplateArguments(const ASTTemplateArgsPtr &In, diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h index 7fc42a4816ec..c5f9fc45612a 100644 --- a/clang/include/clang/Sema/SemaConcept.h +++ b/clang/include/clang/Sema/SemaConcept.h @@ -43,11 +43,15 @@ struct AtomicConstraint { if (ParameterMapping->size() != Other.ParameterMapping->size()) return false; - for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) - if (!C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument()) - .structurallyEquals(C.getCanonicalTemplateArgument( - (*Other.ParameterMapping)[I].getArgument()))) + for (unsigned I = 0, S = ParameterMapping->size(); I < S; ++I) { + llvm::FoldingSetNodeID IDA, IDB; + C.getCanonicalTemplateArgument((*ParameterMapping)[I].getArgument()) + .Profile(IDA, C); + C.getCanonicalTemplateArgument((*Other.ParameterMapping)[I].getArgument()) + .Profile(IDB, C); + if (IDA != IDB) return false; + } return true; } diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp index c28a06bdf0b2..549088ad4a8a 100644 --- a/clang/lib/AST/ASTConcept.cpp +++ b/clang/lib/AST/ASTConcept.cpp @@ -59,8 +59,8 @@ ASTConstraintSatisfaction::Create(const ASTContext &C, } void ConstraintSatisfaction::Profile( - llvm::FoldingSetNodeID &ID, const ASTContext &C, NamedDecl *ConstraintOwner, - ArrayRef TemplateArgs) { + llvm::FoldingSetNodeID &ID, const ASTContext &C, + const NamedDecl *ConstraintOwner, ArrayRef TemplateArgs) { ID.AddPointer(ConstraintOwner); ID.AddInteger(TemplateArgs.size()); for (auto &Arg : TemplateArgs) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 6d1db38e36cc..1be72efe4de8 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -756,12 +756,8 @@ canonicalizeImmediatelyDeclaredConstraint(const ASTContext &C, Expr *IDC, NewConverted.push_back(Arg); } Expr *NewIDC = ConceptSpecializationExpr::Create( - C, NestedNameSpecifierLoc(), /*TemplateKWLoc=*/SourceLocation(), - CSE->getConceptNameInfo(), /*FoundDecl=*/CSE->getNamedConcept(), - CSE->getNamedConcept(), - // Actually canonicalizing a TemplateArgumentLoc is difficult so we - // simply omit the ArgsAsWritten - /*ArgsAsWritten=*/nullptr, NewConverted, nullptr); + C, CSE->getNamedConcept(), NewConverted, nullptr, + CSE->isInstantiationDependent(), CSE->containsUnexpandedParameterPack()); if (auto *OrigFold = dyn_cast(IDC)) NewIDC = new (C) CXXFoldExpr(OrigFold->getType(), SourceLocation(), NewIDC, diff --git a/clang/lib/AST/CXXInheritance.cpp b/clang/lib/AST/CXXInheritance.cpp index a3a3794b2edd..0377bd324cb6 100644 --- a/clang/lib/AST/CXXInheritance.cpp +++ b/clang/lib/AST/CXXInheritance.cpp @@ -758,6 +758,8 @@ CXXRecordDecl::getFinalOverriders(CXXFinalOverriderMap &FinalOverriders) const { return false; }; + // FIXME: IsHidden reads from Overriding from the middle of a remove_if + // over the same sequence! Is this guaranteed to work? Overriding.erase( std::remove_if(Overriding.begin(), Overriding.end(), IsHidden), Overriding.end()); diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 48e310e858b2..227fe80ccab4 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2038,17 +2038,36 @@ CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD, if (auto *MD = getCorrespondingMethodDeclaredInClass(RD, MayBeBase)) return MD; + llvm::SmallVector FinalOverriders; + auto AddFinalOverrider = [&](CXXMethodDecl *D) { + // If this function is overridden by a candidate final overrider, it is not + // a final overrider. + for (CXXMethodDecl *OtherD : FinalOverriders) { + if (declaresSameEntity(D, OtherD) || recursivelyOverrides(OtherD, D)) + return; + } + + // Other candidate final overriders might be overridden by this function. + FinalOverriders.erase( + std::remove_if(FinalOverriders.begin(), FinalOverriders.end(), + [&](CXXMethodDecl *OtherD) { + return recursivelyOverrides(D, OtherD); + }), + FinalOverriders.end()); + + FinalOverriders.push_back(D); + }; + for (const auto &I : RD->bases()) { const RecordType *RT = I.getType()->getAs(); if (!RT) continue; const auto *Base = cast(RT->getDecl()); - CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base); - if (T) - return T; + if (CXXMethodDecl *D = this->getCorrespondingMethodInClass(Base)) + AddFinalOverrider(D); } - return nullptr; + return FinalOverriders.size() == 1 ? FinalOverriders.front() : nullptr; } CXXMethodDecl *CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD, @@ -2105,6 +2124,11 @@ CXXMethodDecl *CXXMethodDecl::getDevirtualizedMethod(const Expr *Base, CXXMethodDecl *DevirtualizedMethod = getCorrespondingMethodInClass(BestDynamicDecl); + // If there final overrider in the dynamic type is ambiguous, we can't + // devirtualize this call. + if (!DevirtualizedMethod) + return nullptr; + // If that method is pure virtual, we can't devirtualize. If this code is // reached, the result would be UB, not a direct call to the derived class // function, and we can't assume the derived class function is defined. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 835198958766..fea7d606f261 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1685,6 +1685,11 @@ MemberExpr *MemberExpr::Create( CXXRecordDecl *RD = dyn_cast_or_null(DC); if (RD && RD->isDependentContext() && RD->isCurrentInstantiation(DC)) E->setTypeDependent(T->isDependentType()); + + // Bitfield with value-dependent width is type-dependent. + FieldDecl *FD = dyn_cast(MemberDecl); + if (FD && FD->isBitField() && FD->getBitWidth()->isValueDependent()) + E->setTypeDependent(true); } if (HasQualOrFound) { diff --git a/clang/lib/AST/ExprConcepts.cpp b/clang/lib/AST/ExprConcepts.cpp index 76d57ed5d5b1..b5a3686dc99a 100644 --- a/clang/lib/AST/ExprConcepts.cpp +++ b/clang/lib/AST/ExprConcepts.cpp @@ -46,24 +46,12 @@ ConceptSpecializationExpr::ConceptSpecializationExpr(const ASTContext &C, ASTConstraintSatisfaction::Create(C, *Satisfaction) : nullptr) { setTemplateArguments(ConvertedArgs); -} - -ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty, - unsigned NumTemplateArgs) - : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(), - NumTemplateArgs(NumTemplateArgs) { } - -void ConceptSpecializationExpr::setTemplateArguments( - ArrayRef Converted) { - assert(Converted.size() == NumTemplateArgs); - std::uninitialized_copy(Converted.begin(), Converted.end(), - getTrailingObjects()); bool IsInstantiationDependent = false; bool ContainsUnexpandedParameterPack = false; - for (const TemplateArgument& Arg : Converted) { - if (Arg.isInstantiationDependent()) + for (const TemplateArgumentLoc& ArgLoc : ArgsAsWritten->arguments()) { + if (ArgLoc.getArgument().isInstantiationDependent()) IsInstantiationDependent = true; - if (Arg.containsUnexpandedParameterPack()) + if (ArgLoc.getArgument().containsUnexpandedParameterPack()) ContainsUnexpandedParameterPack = true; if (ContainsUnexpandedParameterPack && IsInstantiationDependent) break; @@ -80,6 +68,18 @@ void ConceptSpecializationExpr::setTemplateArguments( "should not be value-dependent"); } +ConceptSpecializationExpr::ConceptSpecializationExpr(EmptyShell Empty, + unsigned NumTemplateArgs) + : Expr(ConceptSpecializationExprClass, Empty), ConceptReference(), + NumTemplateArgs(NumTemplateArgs) { } + +void ConceptSpecializationExpr::setTemplateArguments( + ArrayRef Converted) { + assert(Converted.size() == NumTemplateArgs); + std::uninitialized_copy(Converted.begin(), Converted.end(), + getTrailingObjects()); +} + ConceptSpecializationExpr * ConceptSpecializationExpr::Create(const ASTContext &C, NestedNameSpecifierLoc NNS, @@ -98,6 +98,39 @@ ConceptSpecializationExpr::Create(const ASTContext &C, ConvertedArgs, Satisfaction); } +ConceptSpecializationExpr::ConceptSpecializationExpr( + const ASTContext &C, ConceptDecl *NamedConcept, + ArrayRef ConvertedArgs, + const ConstraintSatisfaction *Satisfaction, bool Dependent, + bool ContainsUnexpandedParameterPack) + : Expr(ConceptSpecializationExprClass, C.BoolTy, VK_RValue, OK_Ordinary, + /*TypeDependent=*/false, + /*ValueDependent=*/!Satisfaction, Dependent, + ContainsUnexpandedParameterPack), + ConceptReference(NestedNameSpecifierLoc(), SourceLocation(), + DeclarationNameInfo(), NamedConcept, + NamedConcept, nullptr), + NumTemplateArgs(ConvertedArgs.size()), + Satisfaction(Satisfaction ? + ASTConstraintSatisfaction::Create(C, *Satisfaction) : + nullptr) { + setTemplateArguments(ConvertedArgs); +} + +ConceptSpecializationExpr * +ConceptSpecializationExpr::Create(const ASTContext &C, + ConceptDecl *NamedConcept, + ArrayRef ConvertedArgs, + const ConstraintSatisfaction *Satisfaction, + bool Dependent, + bool ContainsUnexpandedParameterPack) { + void *Buffer = C.Allocate(totalSizeToAlloc( + ConvertedArgs.size())); + return new (Buffer) ConceptSpecializationExpr( + C, NamedConcept, ConvertedArgs, Satisfaction, Dependent, + ContainsUnexpandedParameterPack); +} + ConceptSpecializationExpr * ConceptSpecializationExpr::Create(ASTContext &C, EmptyShell Empty, unsigned NumTemplateArgs) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 382ea5c8d7ef..60dec50d53da 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1535,8 +1535,8 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S, return Stmt::BinaryOperatorClass; case OO_Spaceship: - // FIXME: Update this once we support <=> expressions. - llvm_unreachable("<=> expressions not supported yet"); + BinaryOp = BO_Cmp; + return Stmt::BinaryOperatorClass; case OO_AmpAmp: BinaryOp = BO_LAnd; diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index f2b6c8cd3ee9..e06d120c58bf 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -2,6 +2,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/VersionTuple.h" @@ -31,8 +32,8 @@ const char *CudaVersionToString(CudaVersion V) { llvm_unreachable("invalid enum"); } -CudaVersion CudaStringToVersion(llvm::StringRef S) { - return llvm::StringSwitch(S) +CudaVersion CudaStringToVersion(const llvm::Twine &S) { + return llvm::StringSwitch(S.str()) .Case("7.0", CudaVersion::CUDA_70) .Case("7.5", CudaVersion::CUDA_75) .Case("8.0", CudaVersion::CUDA_80) @@ -40,7 +41,8 @@ CudaVersion CudaStringToVersion(llvm::StringRef S) { .Case("9.1", CudaVersion::CUDA_91) .Case("9.2", CudaVersion::CUDA_92) .Case("10.0", CudaVersion::CUDA_100) - .Case("10.1", CudaVersion::CUDA_101); + .Case("10.1", CudaVersion::CUDA_101) + .Default(CudaVersion::UNKNOWN); } const char *CudaArchToString(CudaArch A) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 57beda26677c..f8866ac4f7f6 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -537,6 +537,13 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } + if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) { + StringRef ABIStr = Target.getABI(); + llvm::LLVMContext &Ctx = TheModule.getContext(); + getModule().addModuleFlag(llvm::Module::Error, "target-abi", + llvm::MDString::get(Ctx, ABIStr)); + } + if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index 25aec3690f21..52477576b2eb 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -258,14 +258,23 @@ void Compilation::initCompilationForDiagnostics() { // Remove any user specified output. Claim any unclaimed arguments, so as // to avoid emitting warnings about unused args. - OptSpecifier OutputOpts[] = { options::OPT_o, options::OPT_MD, - options::OPT_MMD }; + OptSpecifier OutputOpts[] = { + options::OPT_o, options::OPT_MD, options::OPT_MMD, options::OPT_M, + options::OPT_MM, options::OPT_MF, options::OPT_MG, options::OPT_MJ, + options::OPT_MQ, options::OPT_MT, options::OPT_MV}; for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) { if (TranslatedArgs->hasArg(OutputOpts[i])) TranslatedArgs->eraseArg(OutputOpts[i]); } TranslatedArgs->ClaimAllArgs(); + // Force re-creation of the toolchain Args, otherwise our modifications just + // above will have no effect. + for (auto Arg : TCArgs) + if (Arg.second != TranslatedArgs) + delete Arg.second; + TCArgs.clear(); + // Redirect stdout/stderr to /dev/null. Redirects = {None, {""}, {""}}; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 7ee3caaa0bce..fb8335a3695d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3757,6 +3757,11 @@ void Driver::BuildJobs(Compilation &C) const { /*TargetDeviceOffloadKind*/ Action::OFK_None); } + // If we have more than one job, then disable integrated-cc1 for now. + if (C.getJobs().size() > 1) + for (auto &J : C.getJobs()) + J.InProcess = false; + // If the user passed -Qunused-arguments or there were errors, don't warn // about any unused arguments. if (Diags.hasErrorOccurred() || diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp index 7dab2a022d92..6d1e7e61ba1d 100644 --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -371,14 +371,29 @@ int Command::Execute(ArrayRef> Redirects, /*memoryLimit*/ 0, ErrMsg, ExecutionFailed); } +CC1Command::CC1Command(const Action &Source, const Tool &Creator, + const char *Executable, + const llvm::opt::ArgStringList &Arguments, + ArrayRef Inputs) + : Command(Source, Creator, Executable, Arguments, Inputs) { + InProcess = true; +} + void CC1Command::Print(raw_ostream &OS, const char *Terminator, bool Quote, CrashReportInfo *CrashInfo) const { - OS << " (in-process)\n"; + if (InProcess) + OS << " (in-process)\n"; Command::Print(OS, Terminator, Quote, CrashInfo); } -int CC1Command::Execute(ArrayRef> /*Redirects*/, +int CC1Command::Execute(ArrayRef> Redirects, std::string *ErrMsg, bool *ExecutionFailed) const { + // FIXME: Currently, if there're more than one job, we disable + // -fintegrate-cc1. If we're no longer a integrated-cc1 job, fallback to + // out-of-process execution. See discussion in https://reviews.llvm.org/D74447 + if (!InProcess) + return Command::Execute(Redirects, ErrMsg, ExecutionFailed); + PrintFileNames(); SmallVector Argv; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 647465863d3e..aec1971214cf 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4679,6 +4679,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, : "-"); } + // Give the gen diagnostics more chances to succeed, by avoiding intentional + // crashes. + if (D.CCGenDiagnostics) + CmdArgs.push_back("-disable-pragma-debug-crash"); + bool UseSeparateSections = isUseSeparateSections(Triple); if (Args.hasFlag(options::OPT_ffunction_sections, @@ -6048,7 +6053,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Output.getType() == types::TY_Object && Args.hasFlag(options::OPT__SLASH_showFilenames, options::OPT__SLASH_showFilenames_, false)) { - C.getJobs().getJobs().back()->setPrintInputFilenames(true); + C.getJobs().getJobs().back()->PrintInputFilenames = true; } if (Arg *A = Args.getLastArg(options::OPT_pg)) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 02871d2ce411..8a7da4f86b39 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -32,37 +32,24 @@ using namespace llvm::opt; // Parses the contents of version.txt in an CUDA installation. It should // contain one line of the from e.g. "CUDA Version 7.5.2". -static CudaVersion ParseCudaVersionFile(llvm::StringRef V) { +static CudaVersion ParseCudaVersionFile(const Driver &D, llvm::StringRef V) { if (!V.startswith("CUDA Version ")) return CudaVersion::UNKNOWN; V = V.substr(strlen("CUDA Version ")); - int Major = -1, Minor = -1; - auto First = V.split('.'); - auto Second = First.second.split('.'); - if (First.first.getAsInteger(10, Major) || - Second.first.getAsInteger(10, Minor)) + SmallVector VersionParts; + V.split(VersionParts, '.'); + if (VersionParts.size() < 2) return CudaVersion::UNKNOWN; + std::string MajorMinor = join_items(".", VersionParts[0], VersionParts[1]); + CudaVersion Version = CudaStringToVersion(MajorMinor); + if (Version != CudaVersion::UNKNOWN) + return Version; - if (Major == 7 && Minor == 0) { - // This doesn't appear to ever happen -- version.txt doesn't exist in the - // CUDA 7 installs I've seen. But no harm in checking. - return CudaVersion::CUDA_70; - } - if (Major == 7 && Minor == 5) - return CudaVersion::CUDA_75; - if (Major == 8 && Minor == 0) - return CudaVersion::CUDA_80; - if (Major == 9 && Minor == 0) - return CudaVersion::CUDA_90; - if (Major == 9 && Minor == 1) - return CudaVersion::CUDA_91; - if (Major == 9 && Minor == 2) - return CudaVersion::CUDA_92; - if (Major == 10 && Minor == 0) - return CudaVersion::CUDA_100; - if (Major == 10 && Minor == 1) - return CudaVersion::CUDA_101; - return CudaVersion::UNKNOWN; + // Issue a warning and assume that the version we've found is compatible with + // the latest version we support. + D.Diag(diag::warn_drv_unknown_cuda_version) + << MajorMinor << CudaVersionToString(CudaVersion::LATEST); + return CudaVersion::LATEST; } CudaInstallationDetector::CudaInstallationDetector( @@ -160,7 +147,7 @@ CudaInstallationDetector::CudaInstallationDetector( // version.txt isn't present. Version = CudaVersion::CUDA_70; } else { - Version = ParseCudaVersionFile((*VersionFile)->getBuffer()); + Version = ParseCudaVersionFile(D, (*VersionFile)->getBuffer()); } if (Version >= CudaVersion::CUDA_90) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 4e5babdbaa03..e98a407ac42f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3440,6 +3440,7 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, Opts.LexEditorPlaceholders = false; Opts.SetUpStaticAnalyzer = Args.hasArg(OPT_setup_static_analyzer); + Opts.DisablePragmaDebugCrash = Args.hasArg(OPT_disable_pragma_debug_crash); } static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index b67461a146fc..c7bff6a9d8fe 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -45,7 +45,7 @@ _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ - memcpy(&__val, &__tmp, sizeof(__val)); \ + memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \ long long __ret; \ @@ -129,7 +129,7 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f, _Static_assert(sizeof(__val) == sizeof(__Bits)); \ _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \ __Bits __tmp; \ - memcpy(&__val, &__tmp, sizeof(__val)); \ + memcpy(&__tmp, &__val, sizeof(__val)); \ __tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \ __tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \ long long __ret; \ diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h index 3e362dd967db..e91de3c81dbd 100644 --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -48,7 +48,7 @@ #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" -#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010 +#elif CUDA_VERSION < 7000 #error "Unsupported CUDA version!" #endif diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 0e61eab44aeb..9b8de63f04d5 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2181,7 +2181,7 @@ void _mm_sfence(void); /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - (int)__builtin_ia32_vec_ext_v4hi((__m64)a, (int)n) + (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2212,7 +2212,7 @@ void _mm_sfence(void); /// \returns A 64-bit integer vector containing the copied packed data from the /// operands. #define _mm_insert_pi16(a, d, n) \ - (__m64)__builtin_ia32_vec_set_v4hi((__m64)a, (int)d, (int)n) + (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n) /// Compares each of the corresponding packed 16-bit integer values of /// the 64-bit integer vectors, and writes the greater value to the diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 648bda270578..981111d03744 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2552,8 +2552,8 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr, '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/', '/' }; - while (CurPtr+16 <= BufferEnd && - !vec_any_eq(*(const vector unsigned char*)CurPtr, Slashes)) + while (CurPtr + 16 <= BufferEnd && + !vec_any_eq(*(const __vector unsigned char *)CurPtr, Slashes)) CurPtr += 16; #else // Scan for '/' quickly. Many block comments are very large. diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index e4636265a72b..a8cd18b123b0 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -30,6 +30,7 @@ #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" @@ -39,7 +40,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include @@ -1035,15 +1035,19 @@ struct PragmaDebugHandler : public PragmaHandler { IdentifierInfo *II = Tok.getIdentifierInfo(); if (II->isStr("assert")) { - llvm_unreachable("This is an assertion!"); + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) + llvm_unreachable("This is an assertion!"); } else if (II->isStr("crash")) { - LLVM_BUILTIN_TRAP; + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) + LLVM_BUILTIN_TRAP; } else if (II->isStr("parser_crash")) { - Token Crasher; - Crasher.startToken(); - Crasher.setKind(tok::annot_pragma_parser_crash); - Crasher.setAnnotationRange(SourceRange(Tok.getLocation())); - PP.EnterToken(Crasher, /*IsReinject*/false); + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) { + Token Crasher; + Crasher.startToken(); + Crasher.setKind(tok::annot_pragma_parser_crash); + Crasher.setAnnotationRange(SourceRange(Tok.getLocation())); + PP.EnterToken(Crasher, /*IsReinject*/ false); + } } else if (II->isStr("dump")) { Token Identifier; PP.LexUnexpandedToken(Identifier); @@ -1075,9 +1079,11 @@ struct PragmaDebugHandler : public PragmaHandler { << II->getName(); } } else if (II->isStr("llvm_fatal_error")) { - llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error"); + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) + llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error"); } else if (II->isStr("llvm_unreachable")) { - llvm_unreachable("#pragma clang __debug llvm_unreachable"); + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) + llvm_unreachable("#pragma clang __debug llvm_unreachable"); } else if (II->isStr("macro")) { Token MacroName; PP.LexUnexpandedToken(MacroName); @@ -1104,11 +1110,8 @@ struct PragmaDebugHandler : public PragmaHandler { } M->dump(); } else if (II->isStr("overflow_stack")) { - DebugOverflowStack(); - } else if (II->isStr("handle_crash")) { - llvm::CrashRecoveryContext *CRC =llvm::CrashRecoveryContext::GetCurrent(); - if (CRC) - CRC->HandleCrash(); + if (!PP.getPreprocessorOpts().DisablePragmaDebugCrash) + DebugOverflowStack(); } else if (II->isStr("captured")) { HandleCaptured(PP); } else { diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 4af993c4527f..cdc3506d5c68 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -5060,6 +5060,8 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { // recurse to handle whatever we get. if (TryAnnotateTypeOrScopeToken()) return true; + if (TryAnnotateTypeConstraint()) + return true; if (Tok.is(tok::identifier)) return false; @@ -5192,11 +5194,14 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { // placeholder-type-specifier case tok::annot_template_id: { - TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Tok); - return TemplateId->Kind == TNK_Concept_template && + return isTypeConstraintAnnotation() && (NextToken().is(tok::kw_auto) || NextToken().is(tok::kw_decltype)); } - + case tok::annot_cxxscope: + if (NextToken().is(tok::identifier) && TryAnnotateTypeConstraint()) + return true; + return isTypeConstraintAnnotation() && + GetLookAheadToken(2).isOneOf(tok::kw_auto, tok::kw_decltype); case tok::kw___declspec: case tok::kw___cdecl: case tok::kw___stdcall: diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index f872aa3a950c..09e5c7996fcd 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -2716,7 +2716,7 @@ Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, // C++11 [dcl.attr.grammar] p4: If an attribute-specifier-seq appertains // to a friend declaration, that declaration shall be a definition. if (DeclaratorInfo.isFunctionDeclarator() && - DefinitionKind != FDK_Definition && DS.isFriendSpecified()) { + DefinitionKind == FDK_Declaration && DS.isFriendSpecified()) { // Diagnose attributes that appear before decl specifier: // [[]] friend int foo(); ProhibitAttributes(FnAttrs); diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 036eabb94dd7..17f81ec96c1f 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -3374,25 +3374,6 @@ ExprResult Parser::ParseRequiresExpression() { Diag(Tok, diag::err_requires_expr_missing_arrow) << FixItHint::CreateInsertion(Tok.getLocation(), "->"); // Try to parse a 'type-constraint' - CXXScopeSpec SS; - if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), - /*EnteringContext=*/false, - /*MayBePseudoDestructor=*/nullptr, - // If this is not a type-constraint, - // then this scope-spec is part of - // the typename of a non-type - // template parameter - /*IsTypename=*/true, - /*LastII=*/nullptr, - // We won't find concepts in - // non-namespaces anyway, so might as - // well parse this correctly for - // possible type names. - /*OnlyNamespace=*/false, - /*SuppressDiagnostic=*/true)) { - SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); - break; - } if (TryAnnotateTypeConstraint()) { SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); break; @@ -3402,8 +3383,13 @@ ExprResult Parser::ParseRequiresExpression() { SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); break; } - if (Tok.is(tok::annot_cxxscope)) + CXXScopeSpec SS; + if (Tok.is(tok::annot_cxxscope)) { + Actions.RestoreNestedNameSpecifierAnnotation(Tok.getAnnotationValue(), + Tok.getAnnotationRange(), + SS); ConsumeAnnotationToken(); + } Req = Actions.ActOnCompoundRequirement( Expression.get(), NoexceptLoc, SS, takeTemplateIdAnnotation(Tok), @@ -3490,6 +3476,7 @@ ExprResult Parser::ParseRequiresExpression() { // We need to consume the typename to allow 'requires { typename a; }' SourceLocation TypenameKWLoc = ConsumeToken(); if (TryAnnotateCXXScopeToken()) { + TPA.Commit(); SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch); break; } diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index a905ebc67305..7a8cbca1e3f1 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2311,6 +2311,24 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, return SuccessResult; } + // Diagnose address space conversion in nested pointers. + QualType DestPtee = DestType->getPointeeType().isNull() + ? DestType->getPointeeType() + : DestType->getPointeeType()->getPointeeType(); + QualType SrcPtee = SrcType->getPointeeType().isNull() + ? SrcType->getPointeeType() + : SrcType->getPointeeType()->getPointeeType(); + while (!DestPtee.isNull() && !SrcPtee.isNull()) { + if (DestPtee.getAddressSpace() != SrcPtee.getAddressSpace()) { + Self.Diag(OpRange.getBegin(), + diag::warn_bad_cxx_cast_nested_pointer_addr_space) + << CStyle << SrcType << DestType << SrcExpr.get()->getSourceRange(); + break; + } + DestPtee = DestPtee->getPointeeType(); + SrcPtee = SrcPtee->getPointeeType(); + } + // C++ 5.2.10p7: A pointer to an object can be explicitly converted to // a pointer to an object of different type. // Void pointers are not specified, but supported by every compiler out there. diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 81601b09ce0d..290e4cbff4fd 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -167,9 +167,8 @@ calculateConstraintSatisfaction(Sema &S, const Expr *ConstraintExpr, return false; } -template static bool calculateConstraintSatisfaction( - Sema &S, TemplateDeclT *Template, ArrayRef TemplateArgs, + Sema &S, const NamedDecl *Template, ArrayRef TemplateArgs, SourceLocation TemplateNameLoc, MultiLevelTemplateArgumentList &MLTAL, const Expr *ConstraintExpr, ConstraintSatisfaction &Satisfaction) { return calculateConstraintSatisfaction( @@ -182,8 +181,9 @@ static bool calculateConstraintSatisfaction( { TemplateDeductionInfo Info(TemplateNameLoc); Sema::InstantiatingTemplate Inst(S, AtomicExpr->getBeginLoc(), - Sema::InstantiatingTemplate::ConstraintSubstitution{}, Template, - Info, AtomicExpr->getSourceRange()); + Sema::InstantiatingTemplate::ConstraintSubstitution{}, + const_cast(Template), Info, + AtomicExpr->getSourceRange()); if (Inst.isInvalid()) return ExprError(); // We do not want error diagnostics escaping here. @@ -230,8 +230,7 @@ static bool calculateConstraintSatisfaction( }); } -template -static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template, +static bool CheckConstraintSatisfaction(Sema &S, const NamedDecl *Template, ArrayRef ConstraintExprs, ArrayRef TemplateArgs, SourceRange TemplateIDRange, @@ -249,8 +248,8 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template, } Sema::InstantiatingTemplate Inst(S, TemplateIDRange.getBegin(), - Sema::InstantiatingTemplate::ConstraintsCheck{}, Template, TemplateArgs, - TemplateIDRange); + Sema::InstantiatingTemplate::ConstraintsCheck{}, + const_cast(Template), TemplateArgs, TemplateIDRange); if (Inst.isInvalid()) return true; @@ -273,7 +272,7 @@ static bool CheckConstraintSatisfaction(Sema &S, TemplateDeclT *Template, } bool Sema::CheckConstraintSatisfaction( - NamedDecl *Template, ArrayRef ConstraintExprs, + const NamedDecl *Template, ArrayRef ConstraintExprs, ArrayRef TemplateArgs, SourceRange TemplateIDRange, ConstraintSatisfaction &OutSatisfaction) { if (ConstraintExprs.empty()) { @@ -284,7 +283,8 @@ bool Sema::CheckConstraintSatisfaction( llvm::FoldingSetNodeID ID; void *InsertPos; ConstraintSatisfaction *Satisfaction = nullptr; - if (LangOpts.ConceptSatisfactionCaching) { + bool ShouldCache = LangOpts.ConceptSatisfactionCaching && Template; + if (ShouldCache) { ConstraintSatisfaction::Profile(ID, Context, Template, TemplateArgs); Satisfaction = SatisfactionCache.FindNodeOrInsertPos(ID, InsertPos); if (Satisfaction) { @@ -295,27 +295,15 @@ bool Sema::CheckConstraintSatisfaction( } else { Satisfaction = &OutSatisfaction; } - bool Failed; - if (auto *T = dyn_cast(Template)) - Failed = ::CheckConstraintSatisfaction(*this, T, ConstraintExprs, - TemplateArgs, TemplateIDRange, - *Satisfaction); - else if (auto *P = - dyn_cast(Template)) - Failed = ::CheckConstraintSatisfaction(*this, P, ConstraintExprs, - TemplateArgs, TemplateIDRange, - *Satisfaction); - else - Failed = ::CheckConstraintSatisfaction( - *this, cast(Template), - ConstraintExprs, TemplateArgs, TemplateIDRange, *Satisfaction); - if (Failed) { - if (LangOpts.ConceptSatisfactionCaching) + if (::CheckConstraintSatisfaction(*this, Template, ConstraintExprs, + TemplateArgs, TemplateIDRange, + *Satisfaction)) { + if (ShouldCache) delete Satisfaction; return true; } - if (LangOpts.ConceptSatisfactionCaching) { + if (ShouldCache) { // We cannot use InsertNode here because CheckConstraintSatisfaction might // have invalidated it. SatisfactionCache.InsertNode(Satisfaction); @@ -333,6 +321,30 @@ bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr, }); } +bool Sema::CheckFunctionConstraints(const FunctionDecl *FD, + ConstraintSatisfaction &Satisfaction, + SourceLocation UsageLoc) { + const Expr *RC = FD->getTrailingRequiresClause(); + if (RC->isInstantiationDependent()) { + Satisfaction.IsSatisfied = true; + return false; + } + Qualifiers ThisQuals; + CXXRecordDecl *Record = nullptr; + if (auto *Method = dyn_cast(FD)) { + ThisQuals = Method->getMethodQualifiers(); + Record = const_cast(Method->getParent()); + } + CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); + // We substitute with empty arguments in order to rebuild the atomic + // constraint in a constant-evaluated context. + // FIXME: Should this be a dedicated TreeTransform? + return CheckConstraintSatisfaction( + FD, {RC}, /*TemplateArgs=*/{}, + SourceRange(UsageLoc.isValid() ? UsageLoc : FD->getLocation()), + Satisfaction); +} + bool Sema::EnsureTemplateArgumentListConstraints( TemplateDecl *TD, ArrayRef TemplateArgs, SourceRange TemplateIDRange) { @@ -671,6 +683,10 @@ static bool substituteParameterMappings(Sema &S, NormalizedConstraint &N, ArgsAsWritten->arguments().back().getSourceRange().getEnd())); if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs)) return true; + Atomic.ParameterMapping.emplace( + MutableArrayRef( + new (S.Context) TemplateArgumentLoc[SubstArgs.size()], + SubstArgs.size())); std::copy(SubstArgs.arguments().begin(), SubstArgs.arguments().end(), N.getAtomicConstraint()->ParameterMapping->begin()); return false; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0bf490336537..64146f4a912f 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12526,6 +12526,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { var->getDeclContext()->getRedeclContext()->isFileContext() && var->isExternallyVisible() && var->hasLinkage() && !var->isInline() && !var->getDescribedVarTemplate() && + !isa(var) && !isTemplateInstantiation(var->getTemplateSpecializationKind()) && !getDiagnostics().isIgnored(diag::warn_missing_variable_declarations, var->getLocation())) { diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 9fa5691983a1..831e55046e80 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7373,7 +7373,14 @@ class DefaultedComparisonAnalyzer /// resolution [...] CandidateSet.exclude(FD); - S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args); + if (Args[0]->getType()->isOverloadableType()) + S.LookupOverloadedBinOp(CandidateSet, OO, Fns, Args); + else { + // FIXME: We determine whether this is a valid expression by checking to + // see if there's a viable builtin operator candidate for it. That isn't + // really what the rules ask us to do, but should give the right results. + S.AddBuiltinOperatorCandidates(OO, FD->getLocation(), Args, CandidateSet); + } Result R; @@ -7438,6 +7445,31 @@ class DefaultedComparisonAnalyzer if (OO == OO_Spaceship && FD->getReturnType()->isUndeducedAutoType()) { if (auto *BestFD = Best->Function) { + // If any callee has an undeduced return type, deduce it now. + // FIXME: It's not clear how a failure here should be handled. For + // now, we produce an eager diagnostic, because that is forward + // compatible with most (all?) other reasonable options. + if (BestFD->getReturnType()->isUndeducedType() && + S.DeduceReturnType(BestFD, FD->getLocation(), + /*Diagnose=*/false)) { + // Don't produce a duplicate error when asked to explain why the + // comparison is deleted: we diagnosed that when initially checking + // the defaulted operator. + if (Diagnose == NoDiagnostics) { + S.Diag( + FD->getLocation(), + diag::err_defaulted_comparison_cannot_deduce_undeduced_auto) + << Subobj.Kind << Subobj.Decl; + S.Diag( + Subobj.Loc, + diag::note_defaulted_comparison_cannot_deduce_undeduced_auto) + << Subobj.Kind << Subobj.Decl; + S.Diag(BestFD->getLocation(), + diag::note_defaulted_comparison_cannot_deduce_callee) + << Subobj.Kind << Subobj.Decl; + } + return Result::deleted(); + } if (auto *Info = S.Context.CompCategories.lookupInfoForType( BestFD->getCallResultType())) { R.Category = Info->Kind; @@ -7826,10 +7858,14 @@ class DefaultedComparisonSynthesizer return StmtError(); OverloadedOperatorKind OO = FD->getOverloadedOperator(); - ExprResult Op = S.CreateOverloadedBinOp( - Loc, BinaryOperator::getOverloadedOpcode(OO), Fns, - Obj.first.get(), Obj.second.get(), /*PerformADL=*/true, - /*AllowRewrittenCandidates=*/true, FD); + BinaryOperatorKind Opc = BinaryOperator::getOverloadedOpcode(OO); + ExprResult Op; + if (Type->isOverloadableType()) + Op = S.CreateOverloadedBinOp(Loc, Opc, Fns, Obj.first.get(), + Obj.second.get(), /*PerformADL=*/true, + /*AllowRewrittenCandidates=*/true, FD); + else + Op = S.CreateBuiltinBinOp(Loc, Opc, Obj.first.get(), Obj.second.get()); if (Op.isInvalid()) return StmtError(); @@ -7869,8 +7905,12 @@ class DefaultedComparisonSynthesizer llvm::APInt ZeroVal(S.Context.getIntWidth(S.Context.IntTy), 0); Expr *Zero = IntegerLiteral::Create(S.Context, ZeroVal, S.Context.IntTy, Loc); - ExprResult Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(), - Zero, true, true, FD); + ExprResult Comp; + if (VDRef.get()->getType()->isOverloadableType()) + Comp = S.CreateOverloadedBinOp(Loc, BO_NE, Fns, VDRef.get(), Zero, true, + true, FD); + else + Comp = S.CreateBuiltinBinOp(Loc, BO_NE, VDRef.get(), Zero); if (Comp.isInvalid()) return StmtError(); Sema::ConditionResult Cond = S.ActOnCondition( diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ea4b93ee6a5a..29562615e588 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -245,8 +245,8 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, return true; } - // See if this is a deleted function. if (FunctionDecl *FD = dyn_cast(D)) { + // See if this is a deleted function. if (FD->isDeleted()) { auto *Ctor = dyn_cast(FD); if (Ctor && Ctor->isInheritingConstructor()) @@ -259,6 +259,29 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, return true; } + // [expr.prim.id]p4 + // A program that refers explicitly or implicitly to a function with a + // trailing requires-clause whose constraint-expression is not satisfied, + // other than to declare it, is ill-formed. [...] + // + // See if this is a function with constraints that need to be satisfied. + // Check this before deducing the return type, as it might instantiate the + // definition. + if (FD->getTrailingRequiresClause()) { + ConstraintSatisfaction Satisfaction; + if (CheckFunctionConstraints(FD, Satisfaction, Loc)) + // A diagnostic will have already been generated (non-constant + // constraint expression, for example) + return true; + if (!Satisfaction.IsSatisfied) { + Diag(Loc, + diag::err_reference_to_function_with_unsatisfied_constraints) + << D; + DiagnoseUnsatisfiedConstraint(Satisfaction); + return true; + } + } + // If the function has a deduced return type, and we can't deduce it, // then we can't use it either. if (getLangOpts().CPlusPlus14 && FD->getReturnType()->isUndeducedType() && @@ -326,30 +349,6 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef Locs, diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); - // [expr.prim.id]p4 - // A program that refers explicitly or implicitly to a function with a - // trailing requires-clause whose constraint-expression is not satisfied, - // other than to declare it, is ill-formed. [...] - // - // See if this is a function with constraints that need to be satisfied. - if (FunctionDecl *FD = dyn_cast(D)) { - if (Expr *RC = FD->getTrailingRequiresClause()) { - ConstraintSatisfaction Satisfaction; - bool Failed = CheckConstraintSatisfaction(RC, Satisfaction); - if (Failed) - // A diagnostic will have already been generated (non-constant - // constraint expression, for example) - return true; - if (!Satisfaction.IsSatisfied) { - Diag(Loc, - diag::err_reference_to_function_with_unsatisfied_constraints) - << D; - DiagnoseUnsatisfiedConstraint(Satisfaction); - return true; - } - } - } - if (isa(D) && isa(D->getDeclContext()) && !isUnevaluatedContext()) { // C++ [expr.prim.req.nested] p3 diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 192c237b6c1c..98af7fb73eca 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -8487,7 +8487,8 @@ concepts::NestedRequirement * Sema::BuildNestedRequirement(Expr *Constraint) { ConstraintSatisfaction Satisfaction; if (!Constraint->isInstantiationDependent() && - CheckConstraintSatisfaction(Constraint, Satisfaction)) + CheckConstraintSatisfaction(nullptr, {Constraint}, /*TemplateArgs=*/{}, + Constraint->getSourceRange(), Satisfaction)) return nullptr; return new (Context) concepts::NestedRequirement(Context, Constraint, Satisfaction); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 0fd932fac970..db1884acd349 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -3176,7 +3176,7 @@ static bool isNonTrivialObjCLifetimeConversion(Qualifiers FromQuals, /// FromType and \p ToType is permissible, given knowledge about whether every /// outer layer is const-qualified. static bool isQualificationConversionStep(QualType FromType, QualType ToType, - bool CStyle, + bool CStyle, bool IsTopLevel, bool &PreviousToQualsIncludeConst, bool &ObjCLifetimeConversion) { Qualifiers FromQuals = FromType.getQualifiers(); @@ -3213,11 +3213,15 @@ static bool isQualificationConversionStep(QualType FromType, QualType ToType, if (!CStyle && !ToQuals.compatiblyIncludes(FromQuals)) return false; - // For a C-style cast, just require the address spaces to overlap. - // FIXME: Does "superset" also imply the representation of a pointer is the - // same? We're assuming that it does here and in compatiblyIncludes. - if (CStyle && !ToQuals.isAddressSpaceSupersetOf(FromQuals) && - !FromQuals.isAddressSpaceSupersetOf(ToQuals)) + // If address spaces mismatch: + // - in top level it is only valid to convert to addr space that is a + // superset in all cases apart from C-style casts where we allow + // conversions between overlapping address spaces. + // - in non-top levels it is not a valid conversion. + if (ToQuals.getAddressSpace() != FromQuals.getAddressSpace() && + (!IsTopLevel || + !(ToQuals.isAddressSpaceSupersetOf(FromQuals) || + (CStyle && FromQuals.isAddressSpaceSupersetOf(ToQuals))))) return false; // -- if the cv 1,j and cv 2,j are different, then const is in @@ -3258,9 +3262,9 @@ Sema::IsQualificationConversion(QualType FromType, QualType ToType, bool PreviousToQualsIncludeConst = true; bool UnwrappedAnyPointer = false; while (Context.UnwrapSimilarTypes(FromType, ToType)) { - if (!isQualificationConversionStep(FromType, ToType, CStyle, - PreviousToQualsIncludeConst, - ObjCLifetimeConversion)) + if (!isQualificationConversionStep( + FromType, ToType, CStyle, !UnwrappedAnyPointer, + PreviousToQualsIncludeConst, ObjCLifetimeConversion)) return false; UnwrappedAnyPointer = true; } @@ -4499,7 +4503,7 @@ Sema::CompareReferenceRelationship(SourceLocation Loc, // If we find a qualifier mismatch, the types are not reference-compatible, // but are still be reference-related if they're similar. bool ObjCLifetimeConversion = false; - if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false, + if (!isQualificationConversionStep(T2, T1, /*CStyle=*/false, TopLevel, PreviousToQualsIncludeConst, ObjCLifetimeConversion)) return (ConvertedReferent || Context.hasSimilarType(T1, T2)) @@ -6291,9 +6295,9 @@ void Sema::AddOverloadCandidate( return; } - if (Expr *RequiresClause = Function->getTrailingRequiresClause()) { + if (Function->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; - if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + if (CheckFunctionConstraints(Function, Satisfaction) || !Satisfaction.IsSatisfied) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_constraints_not_satisfied; @@ -6808,9 +6812,9 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, return; } - if (Expr *RequiresClause = Method->getTrailingRequiresClause()) { + if (Method->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; - if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + if (CheckFunctionConstraints(Method, Satisfaction) || !Satisfaction.IsSatisfied) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_constraints_not_satisfied; @@ -7204,10 +7208,9 @@ void Sema::AddConversionCandidate( return; } - Expr *RequiresClause = Conversion->getTrailingRequiresClause(); - if (RequiresClause) { + if (Conversion->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; - if (CheckConstraintSatisfaction(RequiresClause, Satisfaction) || + if (CheckFunctionConstraints(Conversion, Satisfaction) || !Satisfaction.IsSatisfied) { Candidate.Viable = false; Candidate.FailureKind = ovl_fail_constraints_not_satisfied; @@ -9270,17 +9273,31 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name, if (ExplicitTemplateArgs) continue; - AddOverloadCandidate(FD, FoundDecl, Args, CandidateSet, - /*SuppressUserConversions=*/false, PartialOverloading, - /*AllowExplicit*/ true, - /*AllowExplicitConversions*/ false, - ADLCallKind::UsesADL); + AddOverloadCandidate( + FD, FoundDecl, Args, CandidateSet, /*SuppressUserConversions=*/false, + PartialOverloading, /*AllowExplicit=*/true, + /*AllowExplicitConversions=*/false, ADLCallKind::UsesADL); + if (CandidateSet.getRewriteInfo().shouldAddReversed(Context, FD)) { + AddOverloadCandidate( + FD, FoundDecl, {Args[1], Args[0]}, CandidateSet, + /*SuppressUserConversions=*/false, PartialOverloading, + /*AllowExplicit=*/true, /*AllowExplicitConversions=*/false, + ADLCallKind::UsesADL, None, OverloadCandidateParamOrder::Reversed); + } } else { + auto *FTD = cast(*I); AddTemplateOverloadCandidate( - cast(*I), FoundDecl, ExplicitTemplateArgs, Args, - CandidateSet, + FTD, FoundDecl, ExplicitTemplateArgs, Args, CandidateSet, /*SuppressUserConversions=*/false, PartialOverloading, - /*AllowExplicit*/true, ADLCallKind::UsesADL); + /*AllowExplicit=*/true, ADLCallKind::UsesADL); + if (CandidateSet.getRewriteInfo().shouldAddReversed( + Context, FTD->getTemplatedDecl())) { + AddTemplateOverloadCandidate( + FTD, FoundDecl, ExplicitTemplateArgs, {Args[1], Args[0]}, + CandidateSet, /*SuppressUserConversions=*/false, PartialOverloading, + /*AllowExplicit=*/true, ADLCallKind::UsesADL, + OverloadCandidateParamOrder::Reversed); + } } } } @@ -9566,17 +9583,15 @@ bool clang::isBetterOverloadCandidate( if (RC1 && RC2) { bool AtLeastAsConstrained1, AtLeastAsConstrained2; if (S.IsAtLeastAsConstrained(Cand1.Function, {RC1}, Cand2.Function, - {RC2}, AtLeastAsConstrained1)) - return false; - if (!AtLeastAsConstrained1) - return false; - if (S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function, + {RC2}, AtLeastAsConstrained1) || + S.IsAtLeastAsConstrained(Cand2.Function, {RC2}, Cand1.Function, {RC1}, AtLeastAsConstrained2)) return false; - if (!AtLeastAsConstrained2) - return true; - } else if (RC1 || RC2) + if (AtLeastAsConstrained1 != AtLeastAsConstrained2) + return AtLeastAsConstrained1; + } else if (RC1 || RC2) { return RC1 != nullptr; + } } } @@ -9947,9 +9962,9 @@ static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD, return false; } - if (const Expr *RC = FD->getTrailingRequiresClause()) { + if (FD->getTrailingRequiresClause()) { ConstraintSatisfaction Satisfaction; - if (S.CheckConstraintSatisfaction(RC, Satisfaction)) + if (S.CheckFunctionConstraints(FD, Satisfaction, Loc)) return false; if (!Satisfaction.IsSatisfied) { if (Complain) { @@ -10974,8 +10989,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand, << (unsigned)FnKindPair.first << (unsigned)ocs_non_template << FnDesc /* Ignored */; ConstraintSatisfaction Satisfaction; - if (S.CheckConstraintSatisfaction(Fn->getTrailingRequiresClause(), - Satisfaction)) + if (S.CheckFunctionConstraints(Fn, Satisfaction)) break; S.DiagnoseUnsatisfiedConstraint(Satisfaction); } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index f961244da072..ad4ea2d2593d 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2047,12 +2047,14 @@ struct ConvertConstructorToDeductionGuideTransform { if (const auto *TC = TTP->getTypeConstraint()) { TemplateArgumentListInfo TransformedArgs; const auto *ArgsAsWritten = TC->getTemplateArgsAsWritten(); - if (SemaRef.Subst(ArgsAsWritten->getTemplateArgs(), + if (!ArgsAsWritten || + SemaRef.Subst(ArgsAsWritten->getTemplateArgs(), ArgsAsWritten->NumTemplateArgs, TransformedArgs, Args)) SemaRef.AttachTypeConstraint( TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(), - TC->getNamedConcept(), &TransformedArgs, NewTTP, + TC->getNamedConcept(), ArgsAsWritten ? &TransformedArgs : nullptr, + NewTTP, NewTTP->isParameterPack() ? cast(TC->getImmediatelyDeclaredConstraint()) ->getEllipsisLoc() diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 394c81c82794..6b865a601f9d 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -2488,7 +2488,7 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg, case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: { NestedNameSpecifierLocBuilder Builder; - TemplateName Template = Arg.getAsTemplate(); + TemplateName Template = Arg.getAsTemplateOrTemplatePattern(); if (DependentTemplateName *DTN = Template.getAsDependentTemplateName()) Builder.MakeTrivial(Context, DTN->getQualifier(), Loc); else if (QualifiedTemplateName *QTN = @@ -2514,27 +2514,10 @@ Sema::getTrivialTemplateArgumentLoc(const TemplateArgument &Arg, } TemplateArgumentLoc -Sema::getIdentityTemplateArgumentLoc(Decl *TemplateParm, +Sema::getIdentityTemplateArgumentLoc(NamedDecl *TemplateParm, SourceLocation Location) { - if (auto *TTP = dyn_cast(TemplateParm)) - return getTrivialTemplateArgumentLoc( - TemplateArgument( - Context.getTemplateTypeParmType(TTP->getDepth(), TTP->getIndex(), - TTP->isParameterPack(), TTP)), - QualType(), Location.isValid() ? Location : TTP->getLocation()); - else if (auto *TTP = dyn_cast(TemplateParm)) - return getTrivialTemplateArgumentLoc(TemplateArgument(TemplateName(TTP)), - QualType(), - Location.isValid() ? Location : - TTP->getLocation()); - auto *NTTP = cast(TemplateParm); - CXXScopeSpec SS; - DeclarationNameInfo Info(NTTP->getDeclName(), - Location.isValid() ? Location : NTTP->getLocation()); - Expr *E = BuildDeclarationNameExpr(SS, Info, NTTP).get(); - return getTrivialTemplateArgumentLoc(TemplateArgument(E), NTTP->getType(), - Location.isValid() ? Location : - NTTP->getLocation()); + return getTrivialTemplateArgumentLoc( + Context.getInjectedTemplateArg(TemplateParm), QualType(), Location); } /// Convert the given deduced template argument and add it to the set of @@ -3456,13 +3439,16 @@ Sema::TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( // ([temp.constr.decl]), those constraints are checked for satisfaction // ([temp.constr.constr]). If the constraints are not satisfied, type // deduction fails. - if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(), - Specialization, Builder, Info.AssociatedConstraintsSatisfaction)) - return TDK_MiscellaneousDeductionFailure; + if (!PartialOverloading || + (Builder.size() == FunctionTemplate->getTemplateParameters()->size())) { + if (CheckInstantiatedFunctionTemplateConstraints(Info.getLocation(), + Specialization, Builder, Info.AssociatedConstraintsSatisfaction)) + return TDK_MiscellaneousDeductionFailure; - if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) { - Info.reset(TemplateArgumentList::CreateCopy(Context, Builder)); - return TDK_ConstraintsNotSatisfied; + if (!Info.AssociatedConstraintsSatisfaction.IsSatisfied) { + Info.reset(TemplateArgumentList::CreateCopy(Context, Builder)); + return TDK_ConstraintsNotSatisfied; + } } if (OriginalCallArgs) { diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 39bc28d62305..568f5404dc0b 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -18,6 +18,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/PrettyDeclStackTrace.h" +#include "clang/AST/TypeVisitor.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Stack.h" #include "clang/Sema/DeclSpec.h" @@ -763,21 +764,30 @@ void Sema::PrintInstantiationStack() { case CodeSynthesisContext::ConstraintsCheck: { unsigned DiagID = 0; + if (!Active->Entity) { + Diags.Report(Active->PointOfInstantiation, + diag::note_nested_requirement_here) + << Active->InstantiationRange; + break; + } if (isa(Active->Entity)) DiagID = diag::note_concept_specialization_here; else if (isa(Active->Entity)) DiagID = diag::note_checking_constraints_for_template_id_here; else if (isa(Active->Entity)) DiagID = diag::note_checking_constraints_for_var_spec_id_here; - else { - assert(isa(Active->Entity)); + else if (isa(Active->Entity)) DiagID = diag::note_checking_constraints_for_class_spec_id_here; + else { + assert(isa(Active->Entity)); + DiagID = diag::note_checking_constraints_for_function_here; } SmallVector TemplateArgsStr; llvm::raw_svector_ostream OS(TemplateArgsStr); cast(Active->Entity)->printName(OS); - printTemplateArgumentList(OS, Active->template_arguments(), - getPrintingPolicy()); + if (!isa(Active->Entity)) + printTemplateArgumentList(OS, Active->template_arguments(), + getPrintingPolicy()); Diags.Report(Active->PointOfInstantiation, DiagID) << OS.str() << Active->InstantiationRange; break; @@ -1048,6 +1058,8 @@ namespace { NonTypeTemplateParmDecl *D); ExprResult TransformSubstNonTypeTemplateParmPackExpr( SubstNonTypeTemplateParmPackExpr *E); + ExprResult TransformSubstNonTypeTemplateParmExpr( + SubstNonTypeTemplateParmExpr *E); /// Rebuild a DeclRefExpr for a VarDecl reference. ExprResult RebuildVarDeclRefExpr(VarDecl *PD, SourceLocation Loc); @@ -1526,6 +1538,44 @@ TemplateInstantiator::TransformSubstNonTypeTemplateParmPackExpr( Arg); } +ExprResult +TemplateInstantiator::TransformSubstNonTypeTemplateParmExpr( + SubstNonTypeTemplateParmExpr *E) { + ExprResult SubstReplacement = TransformExpr(E->getReplacement()); + if (SubstReplacement.isInvalid()) + return true; + QualType SubstType = TransformType(E->getType()); + if (SubstType.isNull()) + return true; + // The type may have been previously dependent and not now, which means we + // might have to implicit cast the argument to the new type, for example: + // template + // concept C = sizeof(U) == 4; + // void foo() requires C<2, 'a'> { } + // When normalizing foo(), we first form the normalized constraints of C: + // AtomicExpr(sizeof(U) == 4, + // U=SubstNonTypeTemplateParmExpr(Param=U, + // Expr=DeclRef(U), + // Type=decltype(T))) + // Then we substitute T = 2, U = 'a' into the parameter mapping, and need to + // produce: + // AtomicExpr(sizeof(U) == 4, + // U=SubstNonTypeTemplateParmExpr(Param=U, + // Expr=ImpCast( + // decltype(2), + // SubstNTTPE(Param=U, Expr='a', + // Type=char)), + // Type=decltype(2))) + // The call to CheckTemplateArgument here produces the ImpCast. + TemplateArgument Converted; + if (SemaRef.CheckTemplateArgument(E->getParameter(), SubstType, + SubstReplacement.get(), + Converted).isInvalid()) + return true; + return transformNonTypeTemplateParmRef(E->getParameter(), + E->getExprLoc(), Converted); +} + ExprResult TemplateInstantiator::RebuildVarDeclRefExpr(VarDecl *PD, SourceLocation Loc) { DeclarationNameInfo NameInfo(PD->getDeclName(), Loc); @@ -2096,6 +2146,94 @@ void Sema::SubstExceptionSpec(FunctionDecl *New, const FunctionProtoType *Proto, UpdateExceptionSpec(New, ESI); } +namespace { + + struct GetContainedInventedTypeParmVisitor : + public TypeVisitor { + using TypeVisitor::Visit; + + TemplateTypeParmDecl *Visit(QualType T) { + if (T.isNull()) + return nullptr; + return Visit(T.getTypePtr()); + } + // The deduced type itself. + TemplateTypeParmDecl *VisitTemplateTypeParmType( + const TemplateTypeParmType *T) { + if (!T->getDecl()->isImplicit()) + return nullptr; + return T->getDecl(); + } + + // Only these types can contain 'auto' types, and subsequently be replaced + // by references to invented parameters. + + TemplateTypeParmDecl *VisitElaboratedType(const ElaboratedType *T) { + return Visit(T->getNamedType()); + } + + TemplateTypeParmDecl *VisitPointerType(const PointerType *T) { + return Visit(T->getPointeeType()); + } + + TemplateTypeParmDecl *VisitBlockPointerType(const BlockPointerType *T) { + return Visit(T->getPointeeType()); + } + + TemplateTypeParmDecl *VisitReferenceType(const ReferenceType *T) { + return Visit(T->getPointeeTypeAsWritten()); + } + + TemplateTypeParmDecl *VisitMemberPointerType(const MemberPointerType *T) { + return Visit(T->getPointeeType()); + } + + TemplateTypeParmDecl *VisitArrayType(const ArrayType *T) { + return Visit(T->getElementType()); + } + + TemplateTypeParmDecl *VisitDependentSizedExtVectorType( + const DependentSizedExtVectorType *T) { + return Visit(T->getElementType()); + } + + TemplateTypeParmDecl *VisitVectorType(const VectorType *T) { + return Visit(T->getElementType()); + } + + TemplateTypeParmDecl *VisitFunctionProtoType(const FunctionProtoType *T) { + return VisitFunctionType(T); + } + + TemplateTypeParmDecl *VisitFunctionType(const FunctionType *T) { + return Visit(T->getReturnType()); + } + + TemplateTypeParmDecl *VisitParenType(const ParenType *T) { + return Visit(T->getInnerType()); + } + + TemplateTypeParmDecl *VisitAttributedType(const AttributedType *T) { + return Visit(T->getModifiedType()); + } + + TemplateTypeParmDecl *VisitMacroQualifiedType(const MacroQualifiedType *T) { + return Visit(T->getUnderlyingType()); + } + + TemplateTypeParmDecl *VisitAdjustedType(const AdjustedType *T) { + return Visit(T->getOriginalType()); + } + + TemplateTypeParmDecl *VisitPackExpansionType(const PackExpansionType *T) { + return Visit(T->getPattern()); + } + }; + +} // namespace + ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm, const MultiLevelTemplateArgumentList &TemplateArgs, int indexAdjustment, @@ -2143,6 +2281,46 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm, return nullptr; } + // In abbreviated templates, TemplateTypeParmDecls with possible + // TypeConstraints are created when the parameter list is originally parsed. + // The TypeConstraints can therefore reference other functions parameters in + // the abbreviated function template, which is why we must instantiate them + // here, when the instantiated versions of those referenced parameters are in + // scope. + if (TemplateTypeParmDecl *TTP = + GetContainedInventedTypeParmVisitor().Visit(OldDI->getType())) { + if (const TypeConstraint *TC = TTP->getTypeConstraint()) { + auto *Inst = cast_or_null( + FindInstantiatedDecl(TTP->getLocation(), TTP, TemplateArgs)); + // We will first get here when instantiating the abbreviated function + // template's described function, but we might also get here later. + // Make sure we do not instantiate the TypeConstraint more than once. + if (Inst && !Inst->getTypeConstraint()) { + // TODO: Concepts: do not instantiate the constraint (delayed constraint + // substitution) + const ASTTemplateArgumentListInfo *TemplArgInfo + = TC->getTemplateArgsAsWritten(); + TemplateArgumentListInfo InstArgs; + + if (TemplArgInfo) { + InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc); + InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc); + if (Subst(TemplArgInfo->getTemplateArgs(), + TemplArgInfo->NumTemplateArgs, InstArgs, TemplateArgs)) + return nullptr; + } + if (AttachTypeConstraint( + TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(), + TC->getNamedConcept(), &InstArgs, Inst, + TTP->isParameterPack() + ? cast(TC->getImmediatelyDeclaredConstraint()) + ->getEllipsisLoc() + : SourceLocation())) + return nullptr; + } + } + } + ParmVarDecl *NewParm = CheckParameter(Context.getTranslationUnitDecl(), OldParm->getInnerLocStart(), OldParm->getLocation(), diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index fbbab8f00703..37dace3bee7f 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1837,6 +1837,23 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl( return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); + if (TemplateParams && TemplateParams->size()) { + auto *LastParam = + dyn_cast(TemplateParams->asArray().back()); + if (LastParam && LastParam->isImplicit() && + LastParam->hasTypeConstraint()) { + // In abbreviated templates, the type-constraints of invented template + // type parameters are instantiated with the function type, invalidating + // the TemplateParameterList which relied on the template type parameter + // not having a type constraint. Recreate the TemplateParameterList with + // the updated parameter list. + TemplateParams = TemplateParameterList::Create( + SemaRef.Context, TemplateParams->getTemplateLoc(), + TemplateParams->getLAngleLoc(), TemplateParams->asArray(), + TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); + } + } + NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, @@ -2177,6 +2194,23 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); + if (TemplateParams && TemplateParams->size()) { + auto *LastParam = + dyn_cast(TemplateParams->asArray().back()); + if (LastParam && LastParam->isImplicit() && + LastParam->hasTypeConstraint()) { + // In abbreviated templates, the type-constraints of invented template + // type parameters are instantiated with the function type, invalidating + // the TemplateParameterList which relied on the template type parameter + // not having a type constraint. Recreate the TemplateParameterList with + // the updated parameter list. + TemplateParams = TemplateParameterList::Create( + SemaRef.Context, TemplateParams->getTemplateLoc(), + TemplateParams->getLAngleLoc(), TemplateParams->asArray(), + TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); + } + } + NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, @@ -2190,6 +2224,9 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( if (TrailingRequiresClause) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); + auto *ThisContext = dyn_cast_or_null(Owner); + Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, + D->getMethodQualifiers(), ThisContext); ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, TemplateArgs); if (SubstRC.isInvalid()) @@ -2522,28 +2559,34 @@ Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl( Inst->setAccess(AS_public); Inst->setImplicit(D->isImplicit()); if (auto *TC = D->getTypeConstraint()) { - // TODO: Concepts: do not instantiate the constraint (delayed constraint - // substitution) - const ASTTemplateArgumentListInfo *TemplArgInfo - = TC->getTemplateArgsAsWritten(); - TemplateArgumentListInfo InstArgs; + if (!D->isImplicit()) { + // Invented template parameter type constraints will be instantiated with + // the corresponding auto-typed parameter as it might reference other + // parameters. - if (TemplArgInfo) { - InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc); - InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc); - if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(), - TemplArgInfo->NumTemplateArgs, - InstArgs, TemplateArgs)) + // TODO: Concepts: do not instantiate the constraint (delayed constraint + // substitution) + const ASTTemplateArgumentListInfo *TemplArgInfo + = TC->getTemplateArgsAsWritten(); + TemplateArgumentListInfo InstArgs; + + if (TemplArgInfo) { + InstArgs.setLAngleLoc(TemplArgInfo->LAngleLoc); + InstArgs.setRAngleLoc(TemplArgInfo->RAngleLoc); + if (SemaRef.Subst(TemplArgInfo->getTemplateArgs(), + TemplArgInfo->NumTemplateArgs, + InstArgs, TemplateArgs)) + return nullptr; + } + if (SemaRef.AttachTypeConstraint( + TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(), + TC->getNamedConcept(), &InstArgs, Inst, + D->isParameterPack() + ? cast(TC->getImmediatelyDeclaredConstraint()) + ->getEllipsisLoc() + : SourceLocation())) return nullptr; } - if (SemaRef.AttachTypeConstraint( - TC->getNestedNameSpecifierLoc(), TC->getConceptNameInfo(), - TC->getNamedConcept(), &InstArgs, Inst, - D->isParameterPack() - ? cast(TC->getImmediatelyDeclaredConstraint()) - ->getEllipsisLoc() - : SourceLocation())) - return nullptr; } if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { TypeSourceInfo *InstantiatedDefaultArg = @@ -4246,24 +4289,29 @@ bool Sema::CheckInstantiatedFunctionTemplateConstraints( Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); - MultiLevelTemplateArgumentList MLTAL = - getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true); - // If this is not an explicit specialization - we need to get the instantiated // version of the template arguments and add them to scope for the // substitution. if (Decl->isTemplateInstantiation()) { InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(), InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(), - MLTAL.getInnermost(), SourceRange()); + TemplateArgs, SourceRange()); if (Inst.isInvalid()) return true; + MultiLevelTemplateArgumentList MLTAL( + *Decl->getTemplateSpecializationArgs()); if (addInstantiatedParametersToScope( *this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(), Scope, MLTAL)) return true; } - + Qualifiers ThisQuals; + CXXRecordDecl *Record = nullptr; + if (auto *Method = dyn_cast(Decl)) { + ThisQuals = Method->getMethodQualifiers(); + Record = Method->getParent(); + } + CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs, PointOfInstantiation, Satisfaction); } diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 093b69ab19d0..362b5a564ab9 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -555,7 +555,7 @@ void ASTDeclReader::Visit(Decl *D) { void ASTDeclReader::VisitDecl(Decl *D) { if (D->isTemplateParameter() || D->isTemplateParameterPack() || - isa(D)) { + isa(D) || isa(D)) { // We don't want to deserialize the DeclContext of a template // parameter or of a parameter of a function template immediately. These // entities might be used in the formulation of its DeclContext (for diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp index a4918d7179ff..002b6070ddcd 100644 --- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp @@ -607,10 +607,17 @@ window.addEventListener("keydown", function (event) { )<<<"; } +static bool shouldDisplayPopUpRange(const SourceRange &Range) { + return !(Range.getBegin().isMacroID() || Range.getEnd().isMacroID()); +} + static void HandlePopUpPieceStartTag(Rewriter &R, const std::vector &PopUpRanges) { for (const auto &Range : PopUpRanges) { + if (!shouldDisplayPopUpRange(Range)) + continue; + html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "", "", /*IsTokenRange=*/true); @@ -626,6 +633,8 @@ static void HandlePopUpPieceEndTag(Rewriter &R, llvm::raw_svector_ostream Out(Buf); SourceRange Range(Piece.getLocation().asRange()); + if (!shouldDisplayPopUpRange(Range)) + return; // Write out the path indices with a right arrow and the message as a row. Out << ""; + << ")\">←"; } os << "
" @@ -870,7 +879,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID, << (num - 1) << "\" title=\"Previous event (" << (num - 1) - << ")\">←
"; diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp index b551e9f4cf82..6d1a67f2a4fa 100644 --- a/clang/tools/driver/cc1_main.cpp +++ b/clang/tools/driver/cc1_main.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" @@ -69,7 +70,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message, // We cannot recover from llvm errors. When reporting a fatal error, exit // with status 70 to generate crash diagnostics. For BSD systems this is // defined as an internal software error. Otherwise, exit with status 1. - exit(GenCrashDiag ? 70 : 1); + llvm::sys::Process::Exit(GenCrashDiag ? 70 : 1); } #ifdef CLANG_HAVE_RLIMITS diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp index 53c8a9d642dc..e1041f91bfd5 100644 --- a/clang/tools/driver/cc1as_main.cpp +++ b/clang/tools/driver/cc1as_main.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" @@ -547,7 +548,7 @@ static void LLVMErrorHandler(void *UserData, const std::string &Message, Diags.Report(diag::err_fe_error_backend) << Message; // We cannot recover from llvm errors. - exit(1); + sys::Process::Exit(1); } int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp index aa29536d8616..91584914d868 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp @@ -23,9 +23,12 @@ #include #include #include -#include #include +#if defined(__has_include) && __has_include() +#include +#endif + #if defined(__has_include) && __has_include() #include #endif // #if defined(__has_include) && __has_include() @@ -247,6 +250,8 @@ TSAN_INTERCEPTOR(void, os_lock_unlock, void *lock) { REAL(os_lock_unlock)(lock); } +#if defined(__has_include) && __has_include() + TSAN_INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) { if (!cur_thread()->is_inited || cur_thread()->is_dead) { return REAL(os_unfair_lock_lock)(lock); @@ -286,6 +291,8 @@ TSAN_INTERCEPTOR(void, os_unfair_lock_unlock, os_unfair_lock_t lock) { REAL(os_unfair_lock_unlock)(lock); } +#endif // #if defined(__has_include) && __has_include() + #if defined(__has_include) && __has_include() TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler, diff --git a/libcxx/include/__config b/libcxx/include/__config index 8f48f16c2364..ccce227f4d6b 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -342,6 +342,10 @@ # define _LIBCPP_HAS_ALIGNED_ALLOC # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_C11_FEATURES +# if __FreeBSD_version >= 1300064 || \ + (__FreeBSD_version >= 1201504 && __FreeBSD_version < 1300000) +# define _LIBCPP_HAS_TIMESPEC_GET +# endif # elif defined(__BIONIC__) # define _LIBCPP_HAS_C11_FEATURES # if __ANDROID_API__ >= 21 diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index de1023346aa5..08cae59b294b 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -275,8 +275,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_PLT32: case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. - // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). - if (expr == R_PC && ((s.getVA() & 1) == 1)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; LLVM_FALLTHROUGH; case R_ARM_CALL: { @@ -286,8 +286,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. - // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || ((s.getVA() & 1) == 0)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). + if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) return true; LLVM_FALLTHROUGH; case R_ARM_THM_CALL: { diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 1d4e80184dcd..b3cc78710e9a 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -67,6 +67,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) { } void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { + // Create canonical PLT entries for non-PIE code. Compilers don't generate + // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. + uint32_t glink = in.plt->getVA(); // VA of .glink + if (!config->isPic) { + for (const Symbol *sym : in.plt->entries) + if (sym->needsPltAddr) { + writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); + buf += 16; + glink += 16; + } + } + // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an // absolute address from a specific .plt slot (usually called .got.plt on // other targets) and jumps there. @@ -85,15 +97,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // computes the PLT index (by computing the distance from the landing b to // itself) and calls _dl_runtime_resolve() (in glibc). uint32_t got = in.got->getVA(); - uint32_t glink = in.plt->getVA(); // VA of .glink const uint8_t *end = buf + 64; if (config->isPic) { - uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12; + uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12; uint32_t gotBcl = got + 4 - (glink + afterBcl); write32(buf + 0, 0x3d6b0000 | ha(afterBcl)); // addis r11,r11,1f-glink@ha write32(buf + 4, 0x7c0802a6); // mflr r0 write32(buf + 8, 0x429f0005); // bcl 20,30,.+4 - write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l + write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l write32(buf + 16, 0x7d8802a6); // mflr r12 write32(buf + 20, 0x7c0803a6); // mtlr r0 write32(buf + 24, 0x7d6c5850); // sub r11,r11,r12 @@ -113,16 +124,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { buf += 56; } else { write32(buf + 0, 0x3d800000 | ha(got + 4)); // lis r12,GOT+4@ha - write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-Glink@ha + write32(buf + 4, 0x3d6b0000 | ha(-glink)); // addis r11,r11,-glink@ha if (ha(got + 4) == ha(got + 8)) write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12) else write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12) - write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-Glink@l + write32(buf + 12, 0x396b0000 | lo(-glink)); // addi r11,r11,-glink@l write32(buf + 16, 0x7c0903a6); // mtctr r0 write32(buf + 20, 0x7c0b5a14); // add r0,r11,r11 if (ha(got + 4) == ha(got + 8)) - write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12) + write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12) else write32(buf + 24, 0x818c0000 | 4); // lwz r12,4(r12) write32(buf + 28, 0x7d605a14); // add r11,r0,r11 @@ -146,7 +157,7 @@ PPC::PPC() { gotBaseSymInGotPlt = false; gotHeaderEntriesNum = 3; gotPltHeaderEntriesNum = 0; - pltHeaderSize = 64; // size of PLTresolve in .glink + pltHeaderSize = 0; pltEntrySize = 4; ipltEntrySize = 16; @@ -178,25 +189,25 @@ void PPC::writeGotHeader(uint8_t *buf) const { void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const { // Address of the symbol resolver stub in .glink . - write32(buf, in.plt->getVA() + 4 * s.pltIndex); + write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex); } bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { - if (type != R_PPC_REL24 && type != R_PPC_PLTREL24) + uint64_t branchAddr, const Symbol &s, int64_t a) const { + if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24) return false; if (s.isInPlt()) return true; if (s.isUndefWeak()) return false; - return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA())); + return !PPC::inBranchRange(type, branchAddr, s.getVA(a)); } uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; } bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { uint64_t offset = dst - src; - if (type == R_PPC_REL24 || type == R_PPC_PLTREL24) + if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24) return isInt<26>(offset); llvm_unreachable("unsupported relocation type used in branch"); } @@ -219,13 +230,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s, return R_DTPREL; case R_PPC_REL14: case R_PPC_REL32: - case R_PPC_LOCAL24PC: case R_PPC_REL16_LO: case R_PPC_REL16_HI: case R_PPC_REL16_HA: return R_PC; case R_PPC_GOT16: return R_GOT_OFF; + case R_PPC_LOCAL24PC: case R_PPC_REL24: return R_PLT_PC; case R_PPC_PLTREL24: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index aab272f53a73..147c51ab285e 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -485,6 +485,14 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; else if (config->relocatable && type != target->noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); + } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && + p->r_addend >= 0x8000) { + // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 + // indicates that r30 is relative to the input section .got2 + // (r_addend>=0x8000), after linking, r30 should be relative to the output + // section .got2 . To compensate for the shift, adjust r_addend by + // ppc32Got2OutSecOff. + p->r_addend += sec->file->ppc32Got2OutSecOff; } } } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index ced9991f2003..93ec06610716 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1198,10 +1198,16 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, getLocation(sec, sym, offset)); if (!sym.isInPlt()) addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); - if (!sym.isDefined()) + if (!sym.isDefined()) { replaceWithDefined( sym, in.plt, target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0); + if (config->emachine == EM_PPC) { + // PPC32 canonical PLT entries are at the beginning of .glink + cast(sym).value = in.plt->headerSize; + in.plt->headerSize += 16; + } + } sym.needsPltAddr = true; sec.relocations.push_back({expr, type, offset, addend, &sym}); return; @@ -1298,10 +1304,10 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, if (expr == R_GOT_PC && !isAbsoluteValue(sym)) { expr = target->adjustRelaxExpr(type, relocatedAddr, expr); } else { - // Addend of R_PPC_PLTREL24 is used to choose call stub type. It should be - // ignored if optimized to R_PC. + // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call + // stub type. It should be ignored if optimized to R_PC. if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) - addend = 0; + addend &= ~0x8000; expr = fromPlt(expr); } } @@ -1752,6 +1758,37 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os, uint64_t off) { auto *ts = make(os, off); ts->partition = os->partition; + if ((config->fixCortexA53Errata843419 || config->fixCortexA8) && + !isd->sections.empty()) { + // The errata fixes are sensitive to addresses modulo 4 KiB. When we add + // thunks we disturb the base addresses of sections placed after the thunks + // this makes patches we have generated redundant, and may cause us to + // generate more patches as different instructions are now in sensitive + // locations. When we generate more patches we may force more branches to + // go out of range, causing more thunks to be generated. In pathological + // cases this can cause the address dependent content pass not to converge. + // We fix this by rounding up the size of the ThunkSection to 4KiB, this + // limits the insertion of a ThunkSection on the addresses modulo 4 KiB, + // which means that adding Thunks to the section does not invalidate + // errata patches for following code. + // Rounding up the size to 4KiB has consequences for code-size and can + // trip up linker script defined assertions. For example the linux kernel + // has an assertion that what LLD represents as an InputSectionDescription + // does not exceed 4 KiB even if the overall OutputSection is > 128 Mib. + // We use the heuristic of rounding up the size when both of the following + // conditions are true: + // 1.) The OutputSection is larger than the ThunkSectionSpacing. This + // accounts for the case where no single InputSectionDescription is + // larger than the OutputSection size. This is conservative but simple. + // 2.) The InputSectionDescription is larger than 4 KiB. This will prevent + // any assertion failures that an InputSectionDescription is < 4 KiB + // in size. + uint64_t isdSize = isd->sections.back()->outSecOff + + isd->sections.back()->getSize() - + isd->sections.front()->outSecOff; + if (os->size > target->getThunkSectionSpacing() && isdSize > 4096) + ts->roundUpSizeForErrata = true; + } isd->thunkSections.push_back({ts, pass}); return ts; } @@ -1820,9 +1857,7 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; - // TODO Restore addend on all targets. - if (config->emachine == EM_AARCH64 || config->emachine == EM_PPC64) - rel.addend = t->addend; + rel.addend = t->addend; if (rel.sym->isInPlt()) rel.expr = toPlt(rel.expr); } @@ -1900,16 +1935,11 @@ bool ThunkCreator::createThunks(ArrayRef outputSections) { rel.sym = t->getThunkTargetSym(); rel.expr = fromPlt(rel.expr); - // On AArch64 and PPC64, a jump/call relocation may be encoded as + // On AArch64 and PPC, a jump/call relocation may be encoded as // STT_SECTION + non-zero addend, clear the addend after // redirection. - // - // The addend of R_PPC_PLTREL24 should be ignored after changing to - // R_PC. - if (config->emachine == EM_AARCH64 || - config->emachine == EM_PPC64 || - (config->emachine == EM_PPC && rel.type == R_PPC_PLTREL24)) - rel.addend = 0; + if (config->emachine != EM_MIPS) + rel.addend = -getPCBias(rel.type); } for (auto &p : isd->thunkSections) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 550a5b38b89b..ea6eab4b47ad 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2449,6 +2449,9 @@ PltSection::PltSection() if (config->emachine == EM_PPC || config->emachine == EM_PPC64) { name = ".glink"; alignment = 4; + // PLTresolve is at the end. + if (config->emachine == EM_PPC) + footerSize = 64; } // On x86 when IBT is enabled, this section contains the second PLT (lazy @@ -2486,7 +2489,7 @@ void PltSection::addEntry(Symbol &sym) { } size_t PltSection::getSize() const { - return headerSize + entries.size() * target->pltEntrySize; + return headerSize + entries.size() * target->pltEntrySize + footerSize; } bool PltSection::isNeeded() const { @@ -3451,19 +3454,14 @@ bool ARMExidxSyntheticSection::classof(const SectionBase *d) { } ThunkSection::ThunkSection(OutputSection *os, uint64_t off) - : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, - config->wordsize, ".text.thunk") { + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, + ".text.thunk") { this->parent = os; this->outSecOff = off; } -// When the errata patching is on, we round the size up to a 4 KiB -// boundary. This limits the effect that adding Thunks has on the addresses -// of the program modulo 4 KiB. As the errata patching is sensitive to address -// modulo 4 KiB this can prevent further patches from being needed due to -// Thunk insertion. size_t ThunkSection::getSize() const { - if (config->fixCortexA53Errata843419 || config->fixCortexA8) + if (roundUpSizeForErrata) return alignTo(size, 4096); return size; } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index f0a598dda51d..5f59178fb541 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -683,9 +683,9 @@ class PltSection : public SyntheticSection { void addEntry(Symbol &sym); size_t getNumEntries() const { return entries.size(); } - size_t headerSize = 0; + size_t headerSize; + size_t footerSize = 0; -private: std::vector entries; }; @@ -1069,6 +1069,10 @@ class ThunkSection : public SyntheticSection { InputSection *getTargetInputSection() const; bool assignOffsets(); + // When true, round up reported size of section to 4 KiB. See comment + // in addThunkSection() for more details. + bool roundUpSizeForErrata = false; + private: std::vector thunks; size_t size = 0; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 7b927a434e36..f9c2e2d74e0a 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -245,8 +245,7 @@ class PPC32PltCallStub final : public Thunk { // decide the offsets in the call stub. PPC32PltCallStub(const InputSection &isec, const Relocation &rel, Symbol &dest) - : Thunk(dest, rel.type == R_PPC_PLTREL24 ? rel.addend : 0), - file(isec.file) {} + : Thunk(dest, rel.addend), file(isec.file) {} uint32_t size() override { return 16; } void writeTo(uint8_t *buf) override; void addSymbols(ThunkSection &isec) override; @@ -257,6 +256,14 @@ class PPC32PltCallStub final : public Thunk { const InputFile *file; }; +class PPC32LongThunk final : public Thunk { +public: + PPC32LongThunk(Symbol &dest, int64_t addend) : Thunk(dest, addend) {} + uint32_t size() override { return config->isPic ? 32 : 16; } + void writeTo(uint8_t *buf) override; + void addSymbols(ThunkSection &isec) override; +}; + // PPC64 Plt call stubs. // Any call site that needs to call through a plt entry needs a call stub in // the .text section. The call stub is responsible for: @@ -765,6 +772,33 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec, return !config->isPic || (isec.file == file && rel.addend == addend); } +void PPC32LongThunk::addSymbols(ThunkSection &isec) { + addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0, + isec); +} + +void PPC32LongThunk::writeTo(uint8_t *buf) { + auto ha = [](uint32_t v) -> uint16_t { return (v + 0x8000) >> 16; }; + auto lo = [](uint32_t v) -> uint16_t { return v; }; + uint32_t d = destination.getVA(addend); + if (config->isPic) { + uint32_t off = d - (getThunkTargetSym()->getVA() + 8); + write32(buf + 0, 0x7c0802a6); // mflr r12,0 + write32(buf + 4, 0x429f0005); // bcl r20,r31,.+4 + write32(buf + 8, 0x7d8802a6); // mtctr r12 + write32(buf + 12, 0x3d8c0000 | ha(off)); // addis r12,r12,off@ha + write32(buf + 16, 0x398c0000 | lo(off)); // addi r12,r12,off@l + write32(buf + 20, 0x7c0803a6); // mtlr r0 + buf += 24; + } else { + write32(buf + 0, 0x3d800000 | ha(d)); // lis r12,d@ha + write32(buf + 4, 0x398c0000 | lo(d)); // addi r12,r12,d@l + buf += 8; + } + write32(buf + 0, 0x7d8903a6); // mtctr r12 + write32(buf + 4, 0x4e800420); // bctr +} + void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset) { uint16_t offHa = (offset + 0x8000) >> 16; uint16_t offLo = offset & 0xffff; @@ -902,9 +936,12 @@ static Thunk *addThunkMips(RelType type, Symbol &s) { static Thunk *addThunkPPC32(const InputSection &isec, const Relocation &rel, Symbol &s) { - assert((rel.type == R_PPC_REL24 || rel.type == R_PPC_PLTREL24) && + assert((rel.type == R_PPC_LOCAL24PC || rel.type == R_PPC_REL24 || + rel.type == R_PPC_PLTREL24) && "unexpected relocation type for thunk"); - return make(isec, rel, s); + if (s.isInPlt()) + return make(isec, rel, s); + return make(s, rel.addend); } static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index bc16417646c3..4e55f93882f1 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -30,6 +30,14 @@ ELF Improvements with GNU now. (`r375051 `_) +* New ``elf32btsmipn32_fbsd`` and ``elf32ltsmipn32_fbsd`` emulations + are supported. + +* Relax MIPS ``jalr``and ``jr`` instructions marked by the ``R_MIPS_JALR`` + relocation. + +* Reduced size of linked MIPS binaries. + COFF Improvements ----------------- @@ -38,7 +46,33 @@ COFF Improvements MinGW Improvements ------------------ -* ... +* Allow using custom .edata sections from input object files (for use + by Wine) + (`dadc6f248868 `) + +* Don't implicitly create import libraries unless requested + (`6540e55067e3 `) + +* Support merging multiple resource object files + (`3d3a9b3b413d `) + and properly handle the default manifest object files that GCC can pass + (`d581dd501381 `) + +* Demangle itanium symbol names in warnings/error messages + (`a66fc1c99f3e `) + +* Print source locations for undefined references and duplicate symbols, + if possible + (`1d06d48bb346 `) + and + (`b38f577c015c `) + +* Look for more filename patterns when resolving ``-l`` options + (`0226c35262df `) + +* Don't error out on duplicate absolute symbols with the same value + (which can happen for the default-null symbol for weak symbols) + (`1737cc750c46 `) MachO Improvements ------------------ diff --git a/lldb/source/DataFormatters/FormatCache.cpp b/lldb/source/DataFormatters/FormatCache.cpp index 231e7ed0c0a0..99f140705446 100644 --- a/lldb/source/DataFormatters/FormatCache.cpp +++ b/lldb/source/DataFormatters/FormatCache.cpp @@ -69,6 +69,8 @@ FormatCache::Entry &FormatCache::GetEntry(ConstString type) { return m_map[type]; } +namespace lldb_private { + template<> bool FormatCache::Entry::IsCached() { return IsFormatCached(); } @@ -79,6 +81,8 @@ template<> bool FormatCache::Entry::IsCached() { return IsSyntheticCached(); } +} // namespace lldb_private + template bool FormatCache::Get(ConstString type, ImplSP &format_impl_sp) { std::lock_guard guard(m_mutex); diff --git a/lldb/source/DataFormatters/LanguageCategory.cpp b/lldb/source/DataFormatters/LanguageCategory.cpp index e18ec0feaa8b..daf8c7af7d1a 100644 --- a/lldb/source/DataFormatters/LanguageCategory.cpp +++ b/lldb/source/DataFormatters/LanguageCategory.cpp @@ -55,6 +55,8 @@ bool LanguageCategory::Get(FormattersMatchData &match_data, return result; } +namespace lldb_private { + /// Explicit instantiations for the three types. /// \{ template bool @@ -83,6 +85,8 @@ auto &LanguageCategory::GetHardcodedFinder() { return m_hardcoded_synthetics; } +} // namespace lldb_private + template bool LanguageCategory::GetHardcoded(FormatManager &fmt_mgr, FormattersMatchData &match_data, diff --git a/lldb/source/Interpreter/CommandAlias.cpp b/lldb/source/Interpreter/CommandAlias.cpp index 5139c53a47b3..5209a7bcbc4e 100644 --- a/lldb/source/Interpreter/CommandAlias.cpp +++ b/lldb/source/Interpreter/CommandAlias.cpp @@ -65,7 +65,8 @@ static bool ProcessAliasOptionsArgs(lldb::CommandObjectSP &cmd_obj_sp, else { for (auto &entry : args.entries()) { if (!entry.ref().empty()) - option_arg_vector->emplace_back("", -1, entry.ref()); + option_arg_vector->emplace_back(std::string(""), -1, + std::string(entry.ref())); } } } diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp index 0bceea14269d..80e9d3a6fc15 100644 --- a/lldb/source/Interpreter/Options.cpp +++ b/lldb/source/Interpreter/Options.cpp @@ -1061,8 +1061,8 @@ llvm::Expected Options::ParseAlias(const Args &args, } if (!option_arg) option_arg = ""; - option_arg_vector->emplace_back(option_str.GetString(), has_arg, - option_arg); + option_arg_vector->emplace_back(std::string(option_str.GetString()), + has_arg, std::string(option_arg)); // Find option in the argument list; also see if it was supposed to take an // argument and if one was supplied. Remove option (and argument, if diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 232063a6f339..6166aa77bda4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -85,35 +85,6 @@ static bool DeclKindIsCXXClass(clang::Decl::Kind decl_kind) { return false; } -struct BitfieldInfo { - uint64_t bit_size; - uint64_t bit_offset; - - BitfieldInfo() - : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {} - - void Clear() { - bit_size = LLDB_INVALID_ADDRESS; - bit_offset = LLDB_INVALID_ADDRESS; - } - - bool IsValid() const { - return (bit_size != LLDB_INVALID_ADDRESS) && - (bit_offset != LLDB_INVALID_ADDRESS); - } - - bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const { - if (IsValid()) { - // This bitfield info is valid, so any subsequent bitfields must not - // overlap and must be at a higher bit offset than any previous bitfield - // + size. - return (bit_size + bit_offset) <= next_bit_offset; - } else { - // If the this BitfieldInfo is not valid, then any offset isOK - return true; - } - } -}; ClangASTImporter &DWARFASTParserClang::GetClangASTImporter() { if (!m_clang_ast_importer_up) { @@ -2419,7 +2390,7 @@ void DWARFASTParserClang::ParseSingleMember( lldb::AccessType &default_accessibility, DelayedPropertyList &delayed_properties, lldb_private::ClangASTImporter::LayoutInfo &layout_info, - BitfieldInfo &last_field_info) { + FieldInfo &last_field_info) { ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); const dw_tag_t tag = die.Tag(); // Get the parent byte size so we can verify any members will fit @@ -2453,6 +2424,14 @@ void DWARFASTParserClang::ParseSingleMember( const dw_attr_t attr = attributes.AttributeAtIndex(i); DWARFFormValue form_value; if (attributes.ExtractFormValueAtIndex(i, form_value)) { + // DW_AT_data_member_location indicates the byte offset of the + // word from the base address of the structure. + // + // DW_AT_bit_offset indicates how many bits into the word + // (according to the host endianness) the low-order bit of the + // field starts. AT_bit_offset can be negative. + // + // DW_AT_bit_size indicates the size of the field in bits. switch (attr) { case DW_AT_name: name = form_value.AsCString(); @@ -2603,36 +2582,24 @@ void DWARFASTParserClang::ParseSingleMember( Type *member_type = die.ResolveTypeUID(encoding_form.Reference()); clang::FieldDecl *field_decl = nullptr; + const uint64_t character_width = 8; + const uint64_t word_width = 32; if (tag == DW_TAG_member) { if (member_type) { + CompilerType member_clang_type = member_type->GetLayoutCompilerType(); + if (accessibility == eAccessNone) accessibility = default_accessibility; member_accessibilities.push_back(accessibility); uint64_t field_bit_offset = (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8)); - if (bit_size > 0) { - BitfieldInfo this_field_info; + if (bit_size > 0) { + FieldInfo this_field_info; this_field_info.bit_offset = field_bit_offset; this_field_info.bit_size = bit_size; - ///////////////////////////////////////////////////////////// - // How to locate a field given the DWARF debug information - // - // AT_byte_size indicates the size of the word in which the bit - // offset must be interpreted. - // - // AT_data_member_location indicates the byte offset of the - // word from the base address of the structure. - // - // AT_bit_offset indicates how many bits into the word - // (according to the host endianness) the low-order bit of the - // field starts. AT_bit_offset can be negative. - // - // AT_bit_size indicates the size of the field in bits. - ///////////////////////////////////////////////////////////// - if (data_bit_offset != UINT64_MAX) { this_field_info.bit_offset = data_bit_offset; } else { @@ -2649,8 +2616,9 @@ void DWARFASTParserClang::ParseSingleMember( } if ((this_field_info.bit_offset >= parent_bit_size) || - !last_field_info.NextBitfieldOffsetIsValid( - this_field_info.bit_offset)) { + (last_field_info.IsBitfield() && + !last_field_info.NextBitfieldOffsetIsValid( + this_field_info.bit_offset))) { ObjectFile *objfile = die.GetDWARF()->GetObjectFile(); objfile->GetModule()->ReportWarning( "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid " @@ -2659,40 +2627,12 @@ void DWARFASTParserClang::ParseSingleMember( "compiler and include the preprocessed output for %s\n", die.GetID(), DW_TAG_value_to_name(tag), name, this_field_info.bit_offset, GetUnitName(parent_die).c_str()); - this_field_info.Clear(); return; } // Update the field bit offset we will report for layout field_bit_offset = this_field_info.bit_offset; - // If the member to be emitted did not start on a character - // boundary and there is empty space between the last field and - // this one, then we need to emit an anonymous member filling - // up the space up to its start. There are three cases here: - // - // 1 If the previous member ended on a character boundary, then - // we can emit an - // anonymous member starting at the most recent character - // boundary. - // - // 2 If the previous member did not end on a character boundary - // and the distance - // from the end of the previous member to the current member - // is less than a - // word width, then we can emit an anonymous member starting - // right after the - // previous member and right before this member. - // - // 3 If the previous member did not end on a character boundary - // and the distance - // from the end of the previous member to the current member - // is greater than - // or equal a word width, then we act as in Case 1. - - const uint64_t character_width = 8; - const uint64_t word_width = 32; - // Objective-C has invalid DW_AT_bit_offset values in older // versions of clang, so we have to be careful and only insert // unnamed bitfields if we have a new enough clang. @@ -2704,53 +2644,57 @@ void DWARFASTParserClang::ParseSingleMember( die.GetCU()->Supports_unnamed_objc_bitfields(); if (detect_unnamed_bitfields) { - BitfieldInfo anon_field_info; + clang::Optional unnamed_field_info; + uint64_t last_field_end = 0; - if ((this_field_info.bit_offset % character_width) != - 0) // not char aligned - { - uint64_t last_field_end = 0; + last_field_end = + last_field_info.bit_offset + last_field_info.bit_size; - if (last_field_info.IsValid()) - last_field_end = - last_field_info.bit_offset + last_field_info.bit_size; - - if (this_field_info.bit_offset != last_field_end) { - if (((last_field_end % character_width) == 0) || // case 1 - (this_field_info.bit_offset - last_field_end >= - word_width)) // case 3 - { - anon_field_info.bit_size = - this_field_info.bit_offset % character_width; - anon_field_info.bit_offset = - this_field_info.bit_offset - anon_field_info.bit_size; - } else // case 2 - { - anon_field_info.bit_size = - this_field_info.bit_offset - last_field_end; - anon_field_info.bit_offset = last_field_end; - } - } + if (!last_field_info.IsBitfield()) { + // The last field was not a bit-field... + // but if it did take up the entire word then we need to extend + // last_field_end so the bit-field does not step into the last + // fields padding. + if (last_field_end != 0 && ((last_field_end % word_width) != 0)) + last_field_end += word_width - (last_field_end % word_width); } - if (anon_field_info.IsValid()) { + // If we have a gap between the last_field_end and the current + // field we have an unnamed bit-field + if (this_field_info.bit_offset != last_field_end && + !(this_field_info.bit_offset < last_field_end)) { + unnamed_field_info = FieldInfo{}; + unnamed_field_info->bit_size = + this_field_info.bit_offset - last_field_end; + unnamed_field_info->bit_offset = last_field_end; + } + + if (unnamed_field_info) { clang::FieldDecl *unnamed_bitfield_decl = ClangASTContext::AddFieldToRecordType( class_clang_type, llvm::StringRef(), m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint, word_width), - accessibility, anon_field_info.bit_size); + accessibility, unnamed_field_info->bit_size); layout_info.field_offsets.insert(std::make_pair( - unnamed_bitfield_decl, anon_field_info.bit_offset)); + unnamed_bitfield_decl, unnamed_field_info->bit_offset)); } } + last_field_info = this_field_info; + last_field_info.SetIsBitfield(true); } else { - last_field_info.Clear(); + last_field_info.bit_offset = field_bit_offset; + + if (llvm::Optional clang_type_size = + member_clang_type.GetByteSize(nullptr)) { + last_field_info.bit_size = *clang_type_size * character_width; + } + + last_field_info.SetIsBitfield(false); } - CompilerType member_clang_type = member_type->GetLayoutCompilerType(); if (!member_clang_type.IsCompleteType()) member_clang_type.GetCompleteType(); @@ -2885,7 +2829,7 @@ bool DWARFASTParserClang::ParseChildMembers( if (!parent_die) return false; - BitfieldInfo last_field_info; + FieldInfo last_field_info; ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); ClangASTContext *ast = diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 4ad757247c3e..8a78299c8b10 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,33 +170,20 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: - struct BitfieldInfo { - uint64_t bit_size; - uint64_t bit_offset; + struct FieldInfo { + uint64_t bit_size = 0; + uint64_t bit_offset = 0; + bool is_bitfield = false; - BitfieldInfo() - : bit_size(LLDB_INVALID_ADDRESS), bit_offset(LLDB_INVALID_ADDRESS) {} + FieldInfo() = default; - void Clear() { - bit_size = LLDB_INVALID_ADDRESS; - bit_offset = LLDB_INVALID_ADDRESS; - } - - bool IsValid() const { - return (bit_size != LLDB_INVALID_ADDRESS) && - (bit_offset != LLDB_INVALID_ADDRESS); - } + void SetIsBitfield(bool flag) { is_bitfield = flag; } + bool IsBitfield() { return is_bitfield; } bool NextBitfieldOffsetIsValid(const uint64_t next_bit_offset) const { - if (IsValid()) { - // This bitfield info is valid, so any subsequent bitfields must not - // overlap and must be at a higher bit offset than any previous bitfield - // + size. - return (bit_size + bit_offset) <= next_bit_offset; - } else { - // If the this BitfieldInfo is not valid, then any offset isOK - return true; - } + // Any subsequent bitfields must not overlap and must be at a higher + // bit offset than any previous bitfield + size. + return (bit_size + bit_offset) <= next_bit_offset; } }; @@ -208,7 +195,7 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::AccessType &default_accessibility, DelayedPropertyList &delayed_properties, lldb_private::ClangASTImporter::LayoutInfo &layout_info, - BitfieldInfo &last_field_info); + FieldInfo &last_field_info); bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, lldb_private::CompilerType &clang_type); diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 9bfaaccd953e..f06d18720c3a 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -77,7 +77,8 @@ namespace llvm { static constexpr size_t strLen(const char *Str) { #if __cplusplus > 201402L return std::char_traits::length(Str); -#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || defined(_MSC_VER) +#elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \ + (defined(_MSC_VER) && _MSC_VER >= 1916) return __builtin_strlen(Str); #else const char *Begin = Str; diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index a860ce2773e1..c710c5d7055c 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -135,7 +135,6 @@ class AsmPrinter : public MachineFunctionPass { MapVector GlobalGOTEquivs; private: - MCSymbol *CurrentFnBegin = nullptr; MCSymbol *CurrentFnEnd = nullptr; MCSymbol *CurExceptionSym = nullptr; @@ -148,6 +147,8 @@ class AsmPrinter : public MachineFunctionPass { static char ID; protected: + MCSymbol *CurrentFnBegin = nullptr; + /// Protected struct HandlerInfo and Handlers permit target extended /// AsmPrinter adds their own handlers. struct HandlerInfo { diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h index eb6d84e8cbb4..218afda1b546 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h @@ -13,7 +13,9 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H #define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" #include namespace llvm { @@ -28,24 +30,31 @@ namespace orc { class JITTargetMachineBuilder; +IRMaterializationUnit::ManglingOptions +irManglingOptionsFromTargetOptions(const TargetOptions &Opts); + /// Simple compile functor: Takes a single IR module and returns an ObjectFile. /// This compiler supports a single compilation thread and LLVMContext only. /// For multithreaded compilation, use ConcurrentIRCompiler below. -class SimpleCompiler { +class SimpleCompiler : public IRCompileLayer::IRCompiler { public: using CompileResult = std::unique_ptr; /// Construct a simple compile functor with the given target. SimpleCompiler(TargetMachine &TM, ObjectCache *ObjCache = nullptr) - : TM(TM), ObjCache(ObjCache) {} + : IRCompiler(irManglingOptionsFromTargetOptions(TM.Options)), TM(TM), + ObjCache(ObjCache) {} /// Set an ObjectCache to query before compiling. void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; } /// Compile a Module to an ObjectFile. - CompileResult operator()(Module &M); + Expected operator()(Module &M) override; private: + IRMaterializationUnit::ManglingOptions + manglingOptionsForTargetMachine(const TargetMachine &TM); + CompileResult tryToLoadFromObjectCache(const Module &M); void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer); @@ -73,14 +82,14 @@ class TMOwningSimpleCompiler : public SimpleCompiler { /// /// This class creates a new TargetMachine and SimpleCompiler instance for each /// compile. -class ConcurrentIRCompiler { +class ConcurrentIRCompiler : public IRCompileLayer::IRCompiler { public: ConcurrentIRCompiler(JITTargetMachineBuilder JTMB, ObjectCache *ObjCache = nullptr); void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; } - std::unique_ptr operator()(Module &M); + Expected> operator()(Module &M) override; private: JITTargetMachineBuilder JTMB; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index d0a9ca5c0580..ecba454887b3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -489,13 +489,18 @@ class MaterializationResponsibility { /// is guaranteed to return Error::success() and can be wrapped with cantFail. Error notifyEmitted(); - /// Adds new symbols to the JITDylib and this responsibility instance. - /// JITDylib entries start out in the materializing state. + /// Attempt to claim responsibility for new definitions. This method can be + /// used to claim responsibility for symbols that are added to a + /// materialization unit during the compilation process (e.g. literal pool + /// symbols). Symbol linkage rules are the same as for symbols that are + /// defined up front: duplicate strong definitions will result in errors. + /// Duplicate weak definitions will be discarded (in which case they will + /// not be added to this responsibility instance). /// /// This method can be used by materialization units that want to add /// additional symbols at materialization time (e.g. stubs, compile /// callbacks, metadata). - Error defineMaterializing(const SymbolFlagsMap &SymbolFlags); + Error defineMaterializing(SymbolFlagsMap SymbolFlags); /// Notify all not-yet-emitted covered by this MaterializationResponsibility /// instance that an error has occurred. @@ -1023,7 +1028,7 @@ class JITDylib { const SymbolStringPtr &DependantName, MaterializingInfo &EmittedMI); - Error defineMaterializing(const SymbolFlagsMap &SymbolFlags); + Expected defineMaterializing(SymbolFlagsMap SymbolFlags); void replace(std::unique_ptr MU); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index 52223a83ad42..bb8270fe80a3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -29,14 +29,29 @@ namespace orc { class IRCompileLayer : public IRLayer { public: - using CompileFunction = - std::function>(Module &)>; + class IRCompiler { + public: + IRCompiler(IRMaterializationUnit::ManglingOptions MO) : MO(std::move(MO)) {} + virtual ~IRCompiler(); + const IRMaterializationUnit::ManglingOptions &getManglingOptions() const { + return MO; + } + virtual Expected> operator()(Module &M) = 0; + + protected: + IRMaterializationUnit::ManglingOptions &manglingOptions() { return MO; } + + private: + IRMaterializationUnit::ManglingOptions MO; + }; using NotifyCompiledFunction = std::function; IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, - CompileFunction Compile); + std::unique_ptr Compile); + + IRCompiler &getCompiler() { return *Compile; } void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled); @@ -45,7 +60,8 @@ class IRCompileLayer : public IRLayer { private: mutable std::mutex IRLayerMutex; ObjectLayer &BaseLayer; - CompileFunction Compile; + std::unique_ptr Compile; + const IRMaterializationUnit::ManglingOptions *ManglingOpts; NotifyCompiledFunction NotifyCompiled = NotifyCompiledFunction(); }; @@ -90,7 +106,10 @@ class LegacyIRCompileLayer { /// Compile the module, and add the resulting object to the base layer /// along with the given memory manager and symbol resolver. Error addModule(VModuleKey K, std::unique_ptr M) { - if (auto Err = BaseLayer.addObject(std::move(K), Compile(*M))) + auto Obj = Compile(*M); + if (!Obj) + return Obj.takeError(); + if (auto Err = BaseLayer.addObject(std::move(K), std::move(*Obj))) return Err; if (NotifyCompiled) NotifyCompiled(std::move(K), std::move(M)); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index c048ff3d5522..8e4760024aa8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -124,7 +124,7 @@ class LLJIT { static std::unique_ptr createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES); - static Expected + static Expected> createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB); /// Create an LLJIT instance with a single compile thread. @@ -192,7 +192,7 @@ class LLJITBuilderState { ExecutionSession &, const Triple &TT)>; using CompileFunctionCreator = - std::function( + std::function>( JITTargetMachineBuilder JTMB)>; std::unique_ptr ES; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h index 8f9bd704395e..95e32b2431a0 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h @@ -21,15 +21,62 @@ namespace llvm { namespace orc { +/// IRMaterializationUnit is a convenient base class for MaterializationUnits +/// wrapping LLVM IR. Represents materialization responsibility for all symbols +/// in the given module. If symbols are overridden by other definitions, then +/// their linkage is changed to available-externally. +class IRMaterializationUnit : public MaterializationUnit { +public: + struct ManglingOptions { + bool EmulatedTLS = false; + }; + + using SymbolNameToDefinitionMap = std::map; + + /// Create an IRMaterializationLayer. Scans the module to build the + /// SymbolFlags and SymbolToDefinition maps. + IRMaterializationUnit(ExecutionSession &ES, const ManglingOptions &MO, + ThreadSafeModule TSM, VModuleKey K); + + /// Create an IRMaterializationLayer from a module, and pre-existing + /// SymbolFlags and SymbolToDefinition maps. The maps must provide + /// entries for each definition in M. + /// This constructor is useful for delegating work from one + /// IRMaterializationUnit to another. + IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K, + SymbolFlagsMap SymbolFlags, + SymbolNameToDefinitionMap SymbolToDefinition); + + /// Return the ModuleIdentifier as the name for this MaterializationUnit. + StringRef getName() const override; + + const ThreadSafeModule &getModule() const { return TSM; } + +protected: + ThreadSafeModule TSM; + SymbolNameToDefinitionMap SymbolToDefinition; + +private: + void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; +}; + /// Interface for layers that accept LLVM IR. class IRLayer { public: - IRLayer(ExecutionSession &ES); + IRLayer(ExecutionSession &ES, + const IRMaterializationUnit::ManglingOptions *&MO) + : ES(ES), MO(MO) {} + virtual ~IRLayer(); /// Returns the ExecutionSession for this layer. ExecutionSession &getExecutionSession() { return ES; } + /// Get the mangling options for this layer. + const IRMaterializationUnit::ManglingOptions *&getManglingOptions() const { + return MO; + } + /// Sets the CloneToNewContextOnEmit flag (false by default). /// /// When set, IR modules added to this layer will be cloned on to a new @@ -57,49 +104,15 @@ class IRLayer { private: bool CloneToNewContextOnEmit = false; ExecutionSession &ES; -}; - -/// IRMaterializationUnit is a convenient base class for MaterializationUnits -/// wrapping LLVM IR. Represents materialization responsibility for all symbols -/// in the given module. If symbols are overridden by other definitions, then -/// their linkage is changed to available-externally. -class IRMaterializationUnit : public MaterializationUnit { -public: - using SymbolNameToDefinitionMap = std::map; - - /// Create an IRMaterializationLayer. Scans the module to build the - /// SymbolFlags and SymbolToDefinition maps. - IRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM, - VModuleKey K); - - /// Create an IRMaterializationLayer from a module, and pre-existing - /// SymbolFlags and SymbolToDefinition maps. The maps must provide - /// entries for each definition in M. - /// This constructor is useful for delegating work from one - /// IRMaterializationUnit to another. - IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K, - SymbolFlagsMap SymbolFlags, - SymbolNameToDefinitionMap SymbolToDefinition); - - /// Return the ModuleIdentifier as the name for this MaterializationUnit. - StringRef getName() const override; - - const ThreadSafeModule &getModule() const { return TSM; } - -protected: - ThreadSafeModule TSM; - SymbolNameToDefinitionMap SymbolToDefinition; - -private: - void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; + const IRMaterializationUnit::ManglingOptions *&MO; }; /// MaterializationUnit that materializes modules by calling the 'emit' method /// on the given IRLayer. class BasicIRLayerMaterializationUnit : public IRMaterializationUnit { public: - BasicIRLayerMaterializationUnit(IRLayer &L, VModuleKey K, - ThreadSafeModule TSM); + BasicIRLayerMaterializationUnit(IRLayer &L, const ManglingOptions &MO, + ThreadSafeModule TSM, VModuleKey K); private: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index f6b86bb23167..97a3dc365457 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -182,8 +182,8 @@ class IRSpeculationLayer : public IRLayer { IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, Speculator &Spec, MangleAndInterner &Mangle, ResultEval Interpreter) - : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), - QueryAnalysis(Interpreter) {} + : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), + S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} void emit(MaterializationResponsibility R, ThreadSafeModule TSM); diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h index 9522c4742244..61a1bd405a4d 100644 --- a/llvm/include/llvm/Support/CrashRecoveryContext.h +++ b/llvm/include/llvm/Support/CrashRecoveryContext.h @@ -99,7 +99,8 @@ class CrashRecoveryContext { /// Explicitly trigger a crash recovery in the current process, and /// return failure from RunSafely(). This function does not return. - void HandleCrash(); + LLVM_ATTRIBUTE_NORETURN + void HandleExit(int RetCode); /// In case of a crash, this is the crash identifier. int RetCode = 0; diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h index 67e37912519b..e934b7413c17 100644 --- a/llvm/include/llvm/Support/Process.h +++ b/llvm/include/llvm/Support/Process.h @@ -201,6 +201,12 @@ class Process { /// Get the result of a process wide random number generator. The /// generator will be automatically seeded in non-deterministic fashion. static unsigned GetRandomNumber(); + + /// Equivalent to ::exit(), except when running inside a CrashRecoveryContext. + /// In that case, the control flow will resume after RunSafely(), like for a + /// crash, rather than exiting the current process. + LLVM_ATTRIBUTE_NORETURN + static void Exit(int RetCode); }; } diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 63ff00afc2ae..ababa1d61f66 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -62,6 +62,8 @@ class PassManagerBuilder { typedef std::function ExtensionFn; + typedef int GlobalExtensionID; + enum ExtensionPointTy { /// EP_EarlyAsPossible - This extension point allows adding passes before /// any other transformations, allowing them to see the code as it is coming @@ -193,7 +195,17 @@ class PassManagerBuilder { /// Adds an extension that will be used by all PassManagerBuilder instances. /// This is intended to be used by plugins, to register a set of /// optimisations to run automatically. - static void addGlobalExtension(ExtensionPointTy Ty, ExtensionFn Fn); + /// + /// \returns A global extension identifier that can be used to remove the + /// extension. + static GlobalExtensionID addGlobalExtension(ExtensionPointTy Ty, + ExtensionFn Fn); + /// Removes an extension that was previously added using addGlobalExtension. + /// This is also intended to be used by plugins, to remove any extension that + /// was previously registered before being unloaded. + /// + /// \param ExtensionID Identifier of the extension to be removed. + static void removeGlobalExtension(GlobalExtensionID ExtensionID); void addExtension(ExtensionPointTy Ty, ExtensionFn Fn); private: @@ -222,10 +234,20 @@ class PassManagerBuilder { /// used by optimizer plugins to allow all front ends to transparently use /// them. Create a static instance of this class in your plugin, providing a /// private function that the PassManagerBuilder can use to add your passes. -struct RegisterStandardPasses { +class RegisterStandardPasses { + PassManagerBuilder::GlobalExtensionID ExtensionID; + +public: RegisterStandardPasses(PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { - PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn)); + ExtensionID = PassManagerBuilder::addGlobalExtension(Ty, std::move(Fn)); + } + + ~RegisterStandardPasses() { + // If the collection holding the global extensions is destroyed after the + // plugin is unloaded, the extension has to be removed here. Indeed, the + // destructor of the ExtensionFn may reference code in the plugin. + PassManagerBuilder::removeGlobalExtension(ExtensionID); } }; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3516f4a7b370..20cd9da31fbd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -709,15 +709,21 @@ void AsmPrinter::EmitFunctionHeader() { // Emit M NOPs for -fpatchable-function-entry=N,M where M>0. We arbitrarily // place prefix data before NOPs. unsigned PatchableFunctionPrefix = 0; + unsigned PatchableFunctionEntry = 0; (void)F.getFnAttribute("patchable-function-prefix") .getValueAsString() .getAsInteger(10, PatchableFunctionPrefix); + (void)F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, PatchableFunctionEntry); if (PatchableFunctionPrefix) { CurrentPatchableFunctionEntrySym = OutContext.createLinkerPrivateTempSymbol(); OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); emitNops(PatchableFunctionPrefix); - } else { + } else if (PatchableFunctionEntry) { + // May be reassigned when emitting the body, to reference the label after + // the initial BTI (AArch64) or endbr32/endbr64 (x86). CurrentPatchableFunctionEntrySym = CurrentFnBegin; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 38011102c7b3..e97bcd62e8c7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -968,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE( addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); } else { - DIE *CalleeDIE = getDIE(CalleeSP); - assert(CalleeDIE && "Could not find DIE for call site entry origin"); + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), *CalleeDIE); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index fa6800de7955..6e643ad26410 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -540,14 +540,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, } } -DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) { - DICompileUnit *Unit = SP->getUnit(); - assert(SP->isDefinition() && "Subprogram not a definition"); - assert(Unit && "Subprogram definition without parent unit"); - auto &CU = getOrCreateDwarfCompileUnit(Unit); - return *CU.getOrCreateSubprogramDIE(SP); -} - /// Try to interpret values loaded into registers that forward parameters /// for \p CallMI. Store parameters with interpreted value into \p Params. static void collectCallSiteParameters(const MachineInstr *CallMI, @@ -758,17 +750,6 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, if (!CalleeDecl || !CalleeDecl->getSubprogram()) continue; CalleeSP = CalleeDecl->getSubprogram(); - - if (CalleeSP->isDefinition()) { - // Ensure that a subprogram DIE for the callee is available in the - // appropriate CU. - constructSubprogramDefinitionDIE(CalleeSP); - } else { - // Create the declaration DIE if it is missing. This is required to - // support compilation of old bitcode with an incomplete list of - // retained metadata. - CU.getOrCreateSubprogramDIE(CalleeSP); - } } // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). @@ -924,6 +905,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); } + // Create DIEs for function declarations used for call site debug info. + for (auto Scope : DIUnit->getRetainedTypes()) + if (auto *SP = dyn_cast_or_null(Scope)) + NewCU.getOrCreateSubprogramDIE(SP); + CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); return NewCU; diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index fd82b1f98055..f90dd48458ea 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -442,9 +442,6 @@ class DwarfDebug : public DebugHandlerBase { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); - /// Construct a DIE for the subprogram definition \p SP and return it. - DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP); - /// Construct DIEs for call site entries describing the calls in \p MF. void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, DIE &ScopeDIE, const MachineFunction &MF); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 1aba956c48de..53747aef77fd 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -188,9 +188,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const { /// Check whether the DIE for this MDNode can be shared across CUs. bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { - // When the MDNode can be part of the type system (this includes subprogram - // declarations *and* subprogram definitions, even local definitions), the - // DIE must be shared across CUs. + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since // cross-CU DIE sharing is used in LTO and removes type redundancy at that // level already) but may be implementable for some value in projects @@ -198,7 +197,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // together. if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return false; - return (isa(D) || isa(D)) && !DD->generateTypeUnits(); + return (isa(D) || + (isa(D) && !cast(D)->isDefinition())) && + !DD->generateTypeUnits(); } DIE *DwarfUnit::getDIE(const DINode *D) const { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 003db39fe5f9..7d77664fbf69 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6857,12 +6857,20 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, Value *Addr = Builder.CreateBitCast( SI.getOperand(1), SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); - if ((IsLE && Upper) || (!IsLE && !Upper)) + const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper); + if (IsOffsetStore) Addr = Builder.CreateGEP( SplitStoreType, Addr, ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); + MaybeAlign Alignment(SI.getAlignment()); + if (IsOffsetStore && Alignment) { + // When splitting the store in half, naturally one half will retain the + // alignment of the original wider store, regardless of whether it was + // over-aligned or not, while the other will require adjustment. + Alignment = commonAlignment(Alignment, HalfValBitSize / 8); + } Builder.CreateAlignedStore( - V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); + V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0); }; CreateSplitStore(LValue, false); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 17eca2b0301c..96e794b15a44 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1385,7 +1385,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression()); + MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 67d9dacda61b..3f6622723bdc 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -107,13 +107,9 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); - // DBG_VALUE insts now carry IR-level indirection in their DIExpression - // rather than encoding it in the instruction itself. - const DIExpression *DIExpr = cast(Expr); - DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return insertInstr(BuildMI(getMF(), getDL(), getTII().get(TargetOpcode::DBG_VALUE), - /*IsIndirect*/ false, Reg, Variable, DIExpr)); + /*IsIndirect*/ true, Reg, Variable, Expr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -124,15 +120,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); - // DBG_VALUE insts now carry IR-level indirection in their DIExpression - // rather than encoding it in the instruction itself. - const DIExpression *DIExpr = cast(Expr); - DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) - .addReg(0) + .addImm(0) .addMetadata(Variable) - .addMetadata(DIExpr); + .addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -156,7 +148,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr); + return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 5870e20d4227..6e5593abb43e 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -524,6 +524,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); std::string Name = Globals[k]->getName(); + GlobalValue::VisibilityTypes Visibility = Globals[k]->getVisibility(); GlobalValue::DLLStorageClassTypes DLLStorage = Globals[k]->getDLLStorageClass(); @@ -549,6 +550,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl &Globals, if (Linkage != GlobalValue::InternalLinkage || !IsMachO) { GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace, Linkage, Name, GEP, &M); + GA->setVisibility(Visibility); GA->setDLLStorageClass(DLLStorage); } diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 2cc547a6b741..5b20a2482b7b 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -100,27 +100,28 @@ enum : unsigned { UndefLocNo = ~0U }; /// usage of the location. class DbgValueLocation { public: - DbgValueLocation(unsigned LocNo) - : LocNo(LocNo) { + DbgValueLocation(unsigned LocNo, bool WasIndirect) + : LocNo(LocNo), WasIndirect(WasIndirect) { static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); assert(locNo() == LocNo && "location truncation"); } - DbgValueLocation() : LocNo(0) {} + DbgValueLocation() : LocNo(0), WasIndirect(0) {} unsigned locNo() const { // Fix up the undef location number, which gets truncated. return LocNo == INT_MAX ? UndefLocNo : LocNo; } + bool wasIndirect() const { return WasIndirect; } bool isUndef() const { return locNo() == UndefLocNo; } DbgValueLocation changeLocNo(unsigned NewLocNo) const { - return DbgValueLocation(NewLocNo); + return DbgValueLocation(NewLocNo, WasIndirect); } friend inline bool operator==(const DbgValueLocation &LHS, const DbgValueLocation &RHS) { - return LHS.LocNo == RHS.LocNo; + return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; } friend inline bool operator!=(const DbgValueLocation &LHS, @@ -129,7 +130,8 @@ class DbgValueLocation { } private: - unsigned LocNo; + unsigned LocNo : 31; + unsigned WasIndirect : 1; }; /// Map of where a user value is live, and its location. @@ -166,6 +168,10 @@ class UserValue { /// Map of slot indices where this value is live. LocMap locInts; + /// Set of interval start indexes that have been trimmed to the + /// lexical scope. + SmallSet trimmedDefs; + /// Insert a DBG_VALUE into MBB at Idx for LocNo. void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled, @@ -279,8 +285,8 @@ class UserValue { void mapVirtRegs(LDVImpl *LDV); /// Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO) { - DbgValueLocation Loc(getLocationNo(LocMO)); + void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { + DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) @@ -315,10 +321,11 @@ class UserValue { /// /// \param LI Scan for copies of the value in LI->reg. /// \param LocNo Location number of LI->reg. + /// \param WasIndirect Indicates if the original use of LI->reg was indirect /// \param Kills Points where the range of LocNo could be extended. /// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here. void addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, + LiveInterval *LI, unsigned LocNo, bool WasIndirect, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS); @@ -538,6 +545,8 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << "undef"; else { OS << I.value().locNo(); + if (I.value().wasIndirect()) + OS << " ind"; } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { @@ -646,18 +655,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { } // Get or create the UserValue for (variable,offset) here. - assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before " - "LiveDebugVariables"); + bool IsIndirect = MI.getOperand(1).isImm(); + if (IsIndirect) + assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); UserValue *UV = getUserValue(Var, Expr, MI.getDebugLoc()); if (!Discard) - UV->addDef(Idx, MI.getOperand(0)); + UV->addDef(Idx, MI.getOperand(0), IsIndirect); else { MachineOperand MO = MachineOperand::CreateReg(0U, false); MO.setIsDebug(); - UV->addDef(Idx, MO); + UV->addDef(Idx, MO, false); } return true; } @@ -765,7 +775,7 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, } void UserValue::addDefsFromCopies( - LiveInterval *LI, unsigned LocNo, + LiveInterval *LI, unsigned LocNo, bool WasIndirect, const SmallVectorImpl &Kills, SmallVectorImpl> &NewDefs, MachineRegisterInfo &MRI, LiveIntervals &LIS) { @@ -829,7 +839,7 @@ void UserValue::addDefsFromCopies( MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - DbgValueLocation NewLoc(LocNo); + DbgValueLocation NewLoc(LocNo, WasIndirect); I.insert(Idx, Idx.getNextSlot(), NewLoc); NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; @@ -877,7 +887,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // sub-register in that regclass). For now, simply skip handling copies if // a sub-register is involved. if (LI && !LocMO.getSubReg()) - addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS); + addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, + LIS); continue; } @@ -910,6 +921,11 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, SlotIndex RStart = LIS.getInstructionIndex(*Range.first); SlotIndex REnd = LIS.getInstructionIndex(*Range.second); + // Variable locations at the first instruction of a block should be + // based on the block's SlotIndex, not the first instruction's index. + if (Range.first == Range.first->getParent()->begin()) + RStart = LIS.getSlotIndexes()->getIndexBefore(*Range.first); + // At the start of each iteration I has been advanced so that // I.stop() >= PrevEnd. Check for overlap. if (PrevEnd && I.start() < PrevEnd) { @@ -922,7 +938,8 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, ++I; // If the interval also overlaps the start of the "next" (i.e. - // current) range create a new interval for the remainder + // current) range create a new interval for the remainder (which + // may be further trimmed). if (RStart < IStop) I.insert(RStart, IStop, Loc); } @@ -932,6 +949,13 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, if (!I.valid()) return; + if (I.start() < RStart) { + // Interval start overlaps range - trim to the scope range. + I.setStartUnchecked(RStart); + // Remember that this interval was trimmed. + trimmedDefs.insert(RStart); + } + // The end of a lexical scope range is the last instruction in the // range. To convert to an interval we need the index of the // instruction after it. @@ -1306,14 +1330,21 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, // that the original virtual register was a pointer. Also, add the stack slot // offset for the spilled register to the expression. const DIExpression *Expr = Expression; - if (Spilled) - Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset); + uint8_t DIExprFlags = DIExpression::ApplyOffset; + bool IsIndirect = Loc.wasIndirect(); + if (Spilled) { + if (IsIndirect) + DIExprFlags |= DIExpression::DerefAfter; + Expr = + DIExpression::prepend(Expr, DIExprFlags, SpillOffset); + IsIndirect = true; + } assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); do { BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - Spilled, MO, Variable, Expr); + IsIndirect, MO, Variable, Expr); // Continue and insert DBG_VALUES after every redefinition of register // associated with the debug value within the range @@ -1345,6 +1376,12 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, bool Spilled = SpillIt != SpillOffsets.end(); unsigned SpillOffset = Spilled ? SpillIt->second : 0; + // If the interval start was trimmed to the lexical scope insert the + // DBG_VALUE at the previous index (otherwise it appears after the + // first instruction in the range). + if (trimmedDefs.count(Start)) + Start = Start.getPrevIndex(); + LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 2bec8613e79c..8294591b7326 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1393,11 +1393,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. - auto *Expr = DI->getExpression(); - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, - *Op, DI->getVariable(), Expr); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + *Op, DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1421,19 +1419,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c613c2540628..176d71643e1a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -677,7 +677,7 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap &VRBaseMap) { MDNode *Var = SD->getVariable(); - const DIExpression *Expr = SD->getExpression(); + MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -701,11 +701,12 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); - if (SD->isIndirect()) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - - FrameMI.addReg(0); + // Push [fi + 0] onto the DIExpression stack. + FrameMI.addImm(0); + else + // Push fi onto the DIExpression stack. + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -751,9 +752,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - - MIB.addReg(0U, RegState::Debug); + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 974914d00d05..d809139d3807 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4716,11 +4716,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { break; case ISD::VECREDUCE_FMAX: NeutralElem = DAG.getConstantFP( - std::numeric_limits::infinity(), dl, ElemVT); + -std::numeric_limits::infinity(), dl, ElemVT); break; case ISD::VECREDUCE_FMIN: NeutralElem = DAG.getConstantFP( - -std::numeric_limits::infinity(), dl, ElemVT); + std::numeric_limits::infinity(), dl, ElemVT); break; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 728d963a916f..421ff3e7d472 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5622,6 +5622,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + bool IsIndirect = false; Optional Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); @@ -5643,6 +5644,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; } } @@ -5691,7 +5693,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, RegAndSize.first, Variable, *FragmentExpr)); } }; @@ -5709,6 +5711,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5722,28 +5725,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - - // If the argument arrives in a stack slot, then what the IR thought was a - // normal Value is actually in memory, and we must add a deref to load it. - if (Op->isFI()) { - int FI = Op->getIndex(); - unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI); - if (Expr->isImplicit()) { - SmallVector Ops = {dwarf::DW_OP_deref_size, Size}; - Expr = DIExpression::prependOpcodes(Expr, Ops); - } else { - Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); - } - } - - // If this location was specified with a dbg.declare, then it and its - // expression calculate the address of the variable. Append a deref to - // force it to be a memory location. - if (IsDbgDeclare) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - + IsIndirect = (Op->isReg()) ? IsIndirect : true; FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, *Op, Variable, Expr)); return true; diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 40bc36c3030b..9d4fdc6b624c 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -960,7 +960,8 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { } // Remap all instructions to the new stack slots. - std::vector> SSRefs(MFI->getObjectIndexEnd()); + std::vector> SSRefs( + MFI->getObjectIndexEnd()); for (MachineBasicBlock &BB : *MF) for (MachineInstr &I : BB) { // Skip lifetime markers. We'll remove them soon. @@ -1074,12 +1075,13 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { } // Rewrite MachineMemOperands that reference old frame indices. - for (auto E : enumerate(SSRefs)) { - const PseudoSourceValue *NewSV = - MF->getPSVManager().getFixedStack(SlotRemap[E.index()]); - for (MachineMemOperand *Ref : E.value()) - Ref->setValue(NewSV); - } + for (auto E : enumerate(SSRefs)) + if (!E.value().empty()) { + const PseudoSourceValue *NewSV = + MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second); + for (MachineMemOperand *Ref : E.value()) + Ref->setValue(NewSV); + } // Update the location of C++ catch objects for the MSVC personality routine. if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp index 4522484222f5..e8b39c037693 100644 --- a/llvm/lib/CodeGen/TypePromotion.cpp +++ b/llvm/lib/CodeGen/TypePromotion.cpp @@ -847,8 +847,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { // Iterate through, and add to, a tree of operands and users in the use-def. while (!WorkList.empty()) { - Value *V = WorkList.back(); - WorkList.pop_back(); + Value *V = WorkList.pop_back_val(); if (CurrentVisited.count(V)) continue; @@ -917,7 +916,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { ++ToPromote; } - // DAG optimisations should be able to handle these cases better, especially + // DAG optimizations should be able to handle these cases better, especially // for function arguments. if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) return false; @@ -941,6 +940,9 @@ bool TypePromotion::runOnFunction(Function &F) { if (!TPC) return false; + AllVisited.clear(); + SafeToPromote.clear(); + SafeWrap.clear(); bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); const TargetMachine &TM = TPC->getTM(); @@ -998,6 +1000,10 @@ bool TypePromotion::runOnFunction(Function &F) { if (MadeChange) LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); + AllVisited.clear(); + SafeToPromote.clear(); + SafeWrap.clear(); + return MadeChange; } diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index f26835ff8a08..9c504da611e0 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -67,9 +67,11 @@ namespace orc { class PartitioningIRMaterializationUnit : public IRMaterializationUnit { public: - PartitioningIRMaterializationUnit(ExecutionSession &ES, ThreadSafeModule TSM, - VModuleKey K, CompileOnDemandLayer &Parent) - : IRMaterializationUnit(ES, std::move(TSM), std::move(K)), + PartitioningIRMaterializationUnit(ExecutionSession &ES, + const ManglingOptions &MO, + ThreadSafeModule TSM, VModuleKey K, + CompileOnDemandLayer &Parent) + : IRMaterializationUnit(ES, MO, std::move(TSM), std::move(K)), Parent(Parent) {} PartitioningIRMaterializationUnit( @@ -111,7 +113,8 @@ CompileOnDemandLayer::compileWholeModule(GlobalValueSet Requested) { CompileOnDemandLayer::CompileOnDemandLayer( ExecutionSession &ES, IRLayer &BaseLayer, LazyCallThroughManager &LCTMgr, IndirectStubsManagerBuilder BuildIndirectStubsManager) - : IRLayer(ES), BaseLayer(BaseLayer), LCTMgr(LCTMgr), + : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), + LCTMgr(LCTMgr), BuildIndirectStubsManager(std::move(BuildIndirectStubsManager)) {} void CompileOnDemandLayer::setPartitionFunction(PartitionFunction Partition) { @@ -136,27 +139,23 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, TSM.withModuleDo([&](Module &M) { // First, do some cleanup on the module: cleanUpModule(M); - - MangleAndInterner Mangle(ES, M.getDataLayout()); - for (auto &GV : M.global_values()) { - if (GV.isDeclaration() || GV.hasLocalLinkage() || - GV.hasAppendingLinkage()) - continue; - - auto Name = Mangle(GV.getName()); - auto Flags = JITSymbolFlags::fromGlobalValue(GV); - if (Flags.isCallable()) - Callables[Name] = SymbolAliasMapEntry(Name, Flags); - else - NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); - } }); + for (auto &KV : R.getSymbols()) { + auto &Name = KV.first; + auto &Flags = KV.second; + if (Flags.isCallable()) + Callables[Name] = SymbolAliasMapEntry(Name, Flags); + else + NonCallables[Name] = SymbolAliasMapEntry(Name, Flags); + } + // Create a partitioning materialization unit and lodge it with the // implementation dylib. if (auto Err = PDR.getImplDylib().define( std::make_unique( - ES, std::move(TSM), R.getVModuleKey(), *this))) { + ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), + *this))) { ES.reportError(std::move(Err)); R.failMaterialization(); return; @@ -316,7 +315,7 @@ void CompileOnDemandLayer::emitPartition( } R.replace(std::make_unique( - ES, std::move(TSM), R.getVModuleKey(), *this)); + ES, *getManglingOptions(), std::move(TSM), R.getVModuleKey(), *this)); BaseLayer.emit(std::move(R), std::move(*ExtractedTSM)); } diff --git a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp index f5671d90420a..160e5ba50311 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp @@ -24,11 +24,20 @@ namespace llvm { namespace orc { +IRMaterializationUnit::ManglingOptions +irManglingOptionsFromTargetOptions(const TargetOptions &Opts) { + IRMaterializationUnit::ManglingOptions MO; + + MO.EmulatedTLS = Opts.EmulatedTLS; + + return MO; +} + /// Compile a Module to an ObjectFile. -SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) { +Expected SimpleCompiler::operator()(Module &M) { CompileResult CachedObject = tryToLoadFromObjectCache(M); if (CachedObject) - return CachedObject; + return std::move(CachedObject); SmallVector ObjBufferSV; @@ -38,7 +47,8 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) { legacy::PassManager PM; MCContext *Ctx; if (TM.addPassesToEmitMC(PM, Ctx, ObjStream)) - llvm_unreachable("Target does not support MC emission."); + return make_error("Target does not support MC emission", + inconvertibleErrorCode()); PM.run(M); } @@ -47,14 +57,11 @@ SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) { auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef()); - if (Obj) { - notifyObjectCompiled(M, *ObjBuffer); - return std::move(ObjBuffer); - } + if (!Obj) + return Obj.takeError(); - // TODO: Actually report errors helpfully. - consumeError(Obj.takeError()); - return nullptr; + notifyObjectCompiled(M, *ObjBuffer); + return std::move(ObjBuffer); } SimpleCompiler::CompileResult @@ -73,9 +80,11 @@ void SimpleCompiler::notifyObjectCompiled(const Module &M, ConcurrentIRCompiler::ConcurrentIRCompiler(JITTargetMachineBuilder JTMB, ObjectCache *ObjCache) - : JTMB(std::move(JTMB)), ObjCache(ObjCache) {} + : IRCompiler(irManglingOptionsFromTargetOptions(JTMB.getOptions())), + JTMB(std::move(JTMB)), ObjCache(ObjCache) {} -std::unique_ptr ConcurrentIRCompiler::operator()(Module &M) { +Expected> +ConcurrentIRCompiler::operator()(Module &M) { auto TM = cantFail(JTMB.createTargetMachine()); SimpleCompiler C(*TM, ObjCache); return C(M); diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 63ef889dae46..ec706cf63d35 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -468,15 +468,19 @@ Error MaterializationResponsibility::notifyEmitted() { } Error MaterializationResponsibility::defineMaterializing( - const SymbolFlagsMap &NewSymbolFlags) { - // Add the given symbols to this responsibility object. - // It's ok if we hit a duplicate here: In that case the new version will be - // discarded, and the JITDylib::defineMaterializing method will return a - // duplicate symbol error. - for (auto &KV : NewSymbolFlags) - SymbolFlags.insert(KV); + SymbolFlagsMap NewSymbolFlags) { - return JD.defineMaterializing(NewSymbolFlags); + LLVM_DEBUG({ + dbgs() << "In " << JD.getName() << " defining materializing symbols " + << NewSymbolFlags << "\n"; + }); + if (auto AcceptedDefs = JD.defineMaterializing(std::move(NewSymbolFlags))) { + // Add all newly accepted symbols to this responsibility object. + for (auto &KV : *AcceptedDefs) + SymbolFlags.insert(KV); + return Error::success(); + } else + return AcceptedDefs.takeError(); } void MaterializationResponsibility::failMaterialization() { @@ -809,31 +813,52 @@ void JITDylib::removeGenerator(DefinitionGenerator &G) { }); } -Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) { - return ES.runSessionLocked([&]() -> Error { +Expected +JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) { + + return ES.runSessionLocked([&]() -> Expected { std::vector AddedSyms; + std::vector RejectedWeakDefs; - for (auto &KV : SymbolFlags) { - SymbolTable::iterator EntryItr; - bool Added; + for (auto SFItr = SymbolFlags.begin(), SFEnd = SymbolFlags.end(); + SFItr != SFEnd; ++SFItr) { - std::tie(EntryItr, Added) = - Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second))); + auto &Name = SFItr->first; + auto &Flags = SFItr->second; - if (Added) { - AddedSyms.push_back(EntryItr); - EntryItr->second.setState(SymbolState::Materializing); - } else { - // Remove any symbols already added. - for (auto &SI : AddedSyms) - Symbols.erase(SI); + auto EntryItr = Symbols.find(Name); - // FIXME: Return all duplicates. - return make_error(*KV.first); - } + // If the entry already exists... + if (EntryItr != Symbols.end()) { + + // If this is a strong definition then error out. + if (!Flags.isWeak()) { + // Remove any symbols already added. + for (auto &SI : AddedSyms) + Symbols.erase(SI); + + // FIXME: Return all duplicates. + return make_error(*Name); + } + + // Otherwise just make a note to discard this symbol after the loop. + RejectedWeakDefs.push_back(SFItr); + continue; + } else + EntryItr = + Symbols.insert(std::make_pair(Name, SymbolTableEntry(Flags))).first; + + AddedSyms.push_back(EntryItr); + EntryItr->second.setState(SymbolState::Materializing); } - return Error::success(); + // Remove any rejected weak definitions from the SymbolFlags map. + while (!RejectedWeakDefs.empty()) { + SymbolFlags.erase(RejectedWeakDefs.back()); + RejectedWeakDefs.pop_back(); + } + + return SymbolFlags; }); } diff --git a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp index d311f34179c7..023940dc8298 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp @@ -11,9 +11,14 @@ namespace llvm { namespace orc { +IRCompileLayer::IRCompiler::~IRCompiler() {} + IRCompileLayer::IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, - CompileFunction Compile) - : IRLayer(ES), BaseLayer(BaseLayer), Compile(std::move(Compile)) {} + std::unique_ptr Compile) + : IRLayer(ES, ManglingOpts), BaseLayer(BaseLayer), + Compile(std::move(Compile)) { + ManglingOpts = &this->Compile->getManglingOptions(); +} void IRCompileLayer::setNotifyCompiled(NotifyCompiledFunction NotifyCompiled) { std::lock_guard Lock(IRLayerMutex); @@ -24,7 +29,7 @@ void IRCompileLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { assert(TSM && "Module must not be null"); - if (auto Obj = TSM.withModuleDo(Compile)) { + if (auto Obj = TSM.withModuleDo(*Compile)) { { std::lock_guard Lock(IRLayerMutex); if (NotifyCompiled) diff --git a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp index 845ecc71eb87..511248f83b25 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp @@ -12,10 +12,10 @@ namespace llvm { namespace orc { -IRTransformLayer::IRTransformLayer(ExecutionSession &ES, - IRLayer &BaseLayer, - TransformFunction Transform) - : IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {} +IRTransformLayer::IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer, + TransformFunction Transform) + : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), + Transform(std::move(Transform)) {} void IRTransformLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 54473ab46423..6189056b3d9f 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -96,8 +96,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { auto ObjLinkingLayer = std::make_unique(ES, std::move(GetMemMgr)); - if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) + if (S.JTMB->getTargetTriple().isOSBinFormatCOFF()) { ObjLinkingLayer->setOverrideObjectFlagsWithResponsibilityFlags(true); + ObjLinkingLayer->setAutoClaimResponsibilityForObjectSymbols(true); + } // FIXME: Explicit conversion to std::unique_ptr added to silence // errors from some GCC / libstdc++ bots. Remove this conversion (i.e. @@ -105,7 +107,7 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { return std::unique_ptr(std::move(ObjLinkingLayer)); } -Expected +Expected> LLJIT::createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB) { @@ -116,13 +118,13 @@ LLJIT::createCompileFunction(LLJITBuilderState &S, // Otherwise default to creating a SimpleCompiler, or ConcurrentIRCompiler, // depending on the number of threads requested. if (S.NumCompileThreads > 0) - return ConcurrentIRCompiler(std::move(JTMB)); + return std::make_unique(std::move(JTMB)); auto TM = JTMB.createTargetMachine(); if (!TM) return TM.takeError(); - return TMOwningSimpleCompiler(std::move(*TM)); + return std::make_unique(std::move(*TM)); } LLJIT::LLJIT(LLJITBuilderState &S, Error &Err) diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp index 580e2682ec8c..ebc7801f11ff 100644 --- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Layer.h" +#include "llvm/IR/Constants.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" @@ -15,15 +16,15 @@ namespace llvm { namespace orc { -IRLayer::IRLayer(ExecutionSession &ES) : ES(ES) {} IRLayer::~IRLayer() {} Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) { return JD.define(std::make_unique( - *this, std::move(K), std::move(TSM))); + *this, *getManglingOptions(), std::move(TSM), std::move(K))); } IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES, + const ManglingOptions &MO, ThreadSafeModule TSM, VModuleKey K) : MaterializationUnit(SymbolFlagsMap(), std::move(K)), TSM(std::move(TSM)) { @@ -32,12 +33,44 @@ IRMaterializationUnit::IRMaterializationUnit(ExecutionSession &ES, MangleAndInterner Mangle(ES, this->TSM.getModuleUnlocked()->getDataLayout()); this->TSM.withModuleDo([&](Module &M) { for (auto &G : M.global_values()) { - if (G.hasName() && !G.isDeclaration() && !G.hasLocalLinkage() && - !G.hasAvailableExternallyLinkage() && !G.hasAppendingLinkage()) { - auto MangledName = Mangle(G.getName()); - SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); - SymbolToDefinition[MangledName] = &G; + // Skip globals that don't generate symbols. + if (!G.hasName() || G.isDeclaration() || G.hasLocalLinkage() || + G.hasAvailableExternallyLinkage() || G.hasAppendingLinkage()) + continue; + + // thread locals generate different symbols depending on whether or not + // emulated TLS is enabled. + if (G.isThreadLocal() && MO.EmulatedTLS) { + auto &GV = cast(G); + + auto Flags = JITSymbolFlags::fromGlobalValue(GV); + + auto EmuTLSV = Mangle(("__emutls_v." + GV.getName()).str()); + SymbolFlags[EmuTLSV] = Flags; + SymbolToDefinition[EmuTLSV] = &GV; + + // If this GV has a non-zero initializer we'll need to emit an + // __emutls.t symbol too. + if (GV.hasInitializer()) { + const auto *InitVal = GV.getInitializer(); + + // Skip zero-initializers. + if (isa(InitVal)) + continue; + const auto *InitIntValue = dyn_cast(InitVal); + if (InitIntValue && InitIntValue->isZero()) + continue; + + auto EmuTLST = Mangle(("__emutls_t." + GV.getName()).str()); + SymbolFlags[EmuTLST] = Flags; + } + continue; } + + // Otherwise we just need a normal linker mangling. + auto MangledName = Mangle(G.getName()); + SymbolFlags[MangledName] = JITSymbolFlags::fromGlobalValue(G); + SymbolToDefinition[MangledName] = &G; } }); } @@ -72,8 +105,8 @@ void IRMaterializationUnit::discard(const JITDylib &JD, } BasicIRLayerMaterializationUnit::BasicIRLayerMaterializationUnit( - IRLayer &L, VModuleKey K, ThreadSafeModule TSM) - : IRMaterializationUnit(L.getExecutionSession(), std::move(TSM), + IRLayer &L, const ManglingOptions &MO, ThreadSafeModule TSM, VModuleKey K) + : IRMaterializationUnit(L.getExecutionSession(), MO, std::move(TSM), std::move(K)), L(L), K(std::move(K)) {} diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index a92264c0be14..ff8289a264c8 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/Object/COFF.h" namespace { @@ -160,6 +161,39 @@ Error RTDyldObjectLinkingLayer::onObjLoad( std::set &InternalSymbols) { SymbolFlagsMap ExtraSymbolsToClaim; SymbolMap Symbols; + + // Hack to support COFF constant pool comdats introduced during compilation: + // (See http://llvm.org/PR40074) + if (auto *COFFObj = dyn_cast(&Obj)) { + auto &ES = getExecutionSession(); + + // For all resolved symbols that are not already in the responsibilty set: + // check whether the symbol is in a comdat section and if so mark it as + // weak. + for (auto &Sym : COFFObj->symbols()) { + if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined) + continue; + auto Name = Sym.getName(); + if (!Name) + return Name.takeError(); + auto I = Resolved.find(*Name); + + // Skip unresolved symbols, internal symbols, and symbols that are + // already in the responsibility set. + if (I == Resolved.end() || InternalSymbols.count(*Name) || + R.getSymbols().count(ES.intern(*Name))) + continue; + auto Sec = Sym.getSection(); + if (!Sec) + return Sec.takeError(); + if (*Sec == COFFObj->section_end()) + continue; + auto &COFFSec = *COFFObj->getCOFFSection(**Sec); + if (COFFSec.Characteristics & COFF::IMAGE_SCN_LNK_COMDAT) + I->second.setFlags(I->second.getFlags() | JITSymbolFlags::Weak); + } + } + for (auto &KV : Resolved) { // Scan the symbols and add them to the Symbols map for resolution. @@ -184,10 +218,17 @@ Error RTDyldObjectLinkingLayer::onObjLoad( Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags); } - if (!ExtraSymbolsToClaim.empty()) + if (!ExtraSymbolsToClaim.empty()) { if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim)) return Err; + // If we claimed responsibility for any weak symbols but were rejected then + // we need to remove them from the resolved set. + for (auto &KV : ExtraSymbolsToClaim) + if (KV.second.isWeak() && !R.getSymbols().count(KV.first)) + Symbols.erase(KV.first); + } + if (auto Err = R.notifyResolved(Symbols)) { R.failMaterialization(); return Err; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index acf0e4afef27..1f978d136049 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -2651,8 +2651,10 @@ void AssemblyWriter::printModule(const Module *M) { printUseLists(nullptr); // Output all of the functions. - for (const Function &F : *M) + for (const Function &F : *M) { + Out << '\n'; printFunction(&F); + } assert(UseListOrders.empty() && "All use-lists should have been consumed"); // Output all attribute groups. diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index e13656ed1c10..af934cc8b9be 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1277,11 +1277,17 @@ Error IRLinker::linkModuleFlagsMetadata() { } // Diagnose inconsistent merge behavior types. - if (SrcBehaviorValue != DstBehaviorValue) - return stringErr("linking module flags '" + ID->getString() + - "': IDs have conflicting behaviors in '" + - SrcM->getModuleIdentifier() + "' and '" + - DstM.getModuleIdentifier() + "'"); + if (SrcBehaviorValue != DstBehaviorValue) { + bool MaxAndWarn = (SrcBehaviorValue == Module::Max && + DstBehaviorValue == Module::Warning) || + (DstBehaviorValue == Module::Max && + SrcBehaviorValue == Module::Warning); + if (!MaxAndWarn) + return stringErr("linking module flags '" + ID->getString() + + "': IDs have conflicting behaviors in '" + + SrcM->getModuleIdentifier() + "' and '" + + DstM.getModuleIdentifier() + "'"); + } auto replaceDstValue = [&](MDNode *New) { Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; @@ -1290,6 +1296,40 @@ Error IRLinker::linkModuleFlagsMetadata() { Flags[ID].first = Flag; }; + // Emit a warning if the values differ and either source or destination + // request Warning behavior. + if ((DstBehaviorValue == Module::Warning || + SrcBehaviorValue == Module::Warning) && + SrcOp->getOperand(2) != DstOp->getOperand(2)) { + std::string Str; + raw_string_ostream(Str) + << "linking module flags '" << ID->getString() + << "': IDs have conflicting values ('" << *SrcOp->getOperand(2) + << "' from " << SrcM->getModuleIdentifier() << " with '" + << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier() + << ')'; + emitWarning(Str); + } + + // Choose the maximum if either source or destination request Max behavior. + if (DstBehaviorValue == Module::Max || SrcBehaviorValue == Module::Max) { + ConstantInt *DstValue = + mdconst::extract(DstOp->getOperand(2)); + ConstantInt *SrcValue = + mdconst::extract(SrcOp->getOperand(2)); + + // The resulting flag should have a Max behavior, and contain the maximum + // value from between the source and destination values. + Metadata *FlagOps[] = { + (DstBehaviorValue != Module::Max ? SrcOp : DstOp)->getOperand(0), ID, + (SrcValue->getZExtValue() > DstValue->getZExtValue() ? SrcOp : DstOp) + ->getOperand(2)}; + MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); + DstModFlags->setOperand(DstIndex, Flag); + Flags[ID].first = Flag; + continue; + } + // Perform the merge for standard behavior types. switch (SrcBehaviorValue) { case Module::Require: @@ -1305,26 +1345,9 @@ Error IRLinker::linkModuleFlagsMetadata() { continue; } case Module::Warning: { - // Emit a warning if the values differ. - if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - std::string str; - raw_string_ostream(str) - << "linking module flags '" << ID->getString() - << "': IDs have conflicting values ('" << *SrcOp->getOperand(2) - << "' from " << SrcM->getModuleIdentifier() << " with '" - << *DstOp->getOperand(2) << "' from " << DstM.getModuleIdentifier() - << ')'; - emitWarning(str); - } - continue; + break; } case Module::Max: { - ConstantInt *DstValue = - mdconst::extract(DstOp->getOperand(2)); - ConstantInt *SrcValue = - mdconst::extract(SrcOp->getOperand(2)); - if (SrcValue->getZExtValue() > DstValue->getZExtValue()) - overrideDstValue(); break; } case Module::Append: { @@ -1350,6 +1373,7 @@ Error IRLinker::linkModuleFlagsMetadata() { break; } } + } // Check all of the requirements. diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp index a3dba1a3aa10..2bc668beed32 100644 --- a/llvm/lib/Support/CRC.cpp +++ b/llvm/lib/Support/CRC.cpp @@ -85,7 +85,15 @@ uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { #include uint32_t llvm::crc32(uint32_t CRC, ArrayRef Data) { - return ::crc32(CRC, (const Bytef *)Data.data(), Data.size()); + // Zlib's crc32() only takes a 32-bit length, so we have to iterate for larger + // sizes. One could use crc32_z() instead, but that's a recent (2017) addition + // and may not be available on all systems. + do { + ArrayRef Slice = Data.take_front(UINT32_MAX); + CRC = ::crc32(CRC, (const Bytef *)Slice.data(), (uInt)Slice.size()); + Data = Data.drop_front(Slice.size()); + } while (Data.size() > 0); + return CRC; } #endif diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp index b9031f52375c..356835609830 100644 --- a/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -14,9 +14,6 @@ #include "llvm/Support/ThreadLocal.h" #include #include -#ifdef _WIN32 -#include // for GetExceptionInformation -#endif #if LLVM_ON_UNIX #include // EX_IOERR #endif @@ -41,11 +38,11 @@ struct CrashRecoveryContextImpl { ::jmp_buf JumpBuffer; volatile unsigned Failed : 1; unsigned SwitchedThread : 1; + unsigned ValidJumpBuffer : 1; public: - CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), - Failed(false), - SwitchedThread(false) { + CrashRecoveryContextImpl(CrashRecoveryContext *CRC) noexcept + : CRC(CRC), Failed(false), SwitchedThread(false), ValidJumpBuffer(false) { Next = CurrentContext->get(); CurrentContext->set(this); } @@ -80,10 +77,13 @@ struct CrashRecoveryContextImpl { CRC->RetCode = RetCode; // Jump back to the RunSafely we were called under. - longjmp(JumpBuffer, 1); + if (ValidJumpBuffer) + longjmp(JumpBuffer, 1); + + // Otherwise let the caller decide of the outcome of the crash. Currently + // this occurs when using SEH on Windows with MSVC or clang-cl. } }; - } static ManagedStatic gCrashRecoveryContextMutex; @@ -175,6 +175,9 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { } #if defined(_MSC_VER) + +#include // for GetExceptionInformation + // If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way // better than VEH. Vectored exception handling catches all exceptions happening // on the thread with installed exception handlers, so it can interfere with @@ -188,30 +191,45 @@ static void uninstallExceptionOrSignalHandlers() {} // We need this function because the call to GetExceptionInformation() can only // occur inside the __except evaluation block -static int ExceptionFilter(bool DumpStackAndCleanup, - _EXCEPTION_POINTERS *Except) { - if (DumpStackAndCleanup) - sys::CleanupOnSignal((uintptr_t)Except); +static int ExceptionFilter(_EXCEPTION_POINTERS *Except) { + // Lookup the current thread local recovery object. + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); + + if (!CRCI) { + // Something has gone horribly wrong, so let's just tell everyone + // to keep searching + CrashRecoveryContext::Disable(); + return EXCEPTION_CONTINUE_SEARCH; + } + + int RetCode = (int)Except->ExceptionRecord->ExceptionCode; + if ((RetCode & 0xF0000000) == 0xE0000000) + RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit + + // Handle the crash + const_cast(CRCI)->HandleCrash( + RetCode, reinterpret_cast(Except)); + return EXCEPTION_EXECUTE_HANDLER; } -static bool InvokeFunctionCall(function_ref Fn, - bool DumpStackAndCleanup, int &RetCode) { - __try { - Fn(); - } __except (ExceptionFilter(DumpStackAndCleanup, GetExceptionInformation())) { - RetCode = GetExceptionCode(); - return false; - } - return true; -} - +#if defined(__clang__) && defined(_M_IX86) +// Work around PR44697. +__attribute__((optnone)) +#endif bool CrashRecoveryContext::RunSafely(function_ref Fn) { if (!gCrashRecoveryEnabled) { Fn(); return true; } - return InvokeFunctionCall(Fn, DumpStackAndCleanupOnFailure, RetCode); + assert(!Impl && "Crash recovery context already initialized!"); + Impl = new CrashRecoveryContextImpl(this); + __try { + Fn(); + } __except (ExceptionFilter(GetExceptionInformation())) { + return false; + } + return true; } #else // !_MSC_VER @@ -264,10 +282,13 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) // TODO: We can capture the stack backtrace here and store it on the // implementation if we so choose. + int RetCode = (int)ExceptionInfo->ExceptionRecord->ExceptionCode; + if ((RetCode & 0xF0000000) == 0xE0000000) + RetCode &= ~0xF0000000; // this crash was generated by sys::Process::Exit + // Handle the crash const_cast(CRCI)->HandleCrash( - (int)ExceptionInfo->ExceptionRecord->ExceptionCode, - reinterpret_cast(ExceptionInfo)); + RetCode, reinterpret_cast(ExceptionInfo)); // Note that we don't actually get here because HandleCrash calls // longjmp, which means the HandleCrash function never returns. @@ -388,6 +409,7 @@ bool CrashRecoveryContext::RunSafely(function_ref Fn) { CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this); Impl = CRCI; + CRCI->ValidJumpBuffer = true; if (setjmp(CRCI->JumpBuffer) != 0) { return false; } @@ -399,12 +421,19 @@ bool CrashRecoveryContext::RunSafely(function_ref Fn) { #endif // !_MSC_VER -void CrashRecoveryContext::HandleCrash() { - CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; +LLVM_ATTRIBUTE_NORETURN +void CrashRecoveryContext::HandleExit(int RetCode) { +#if defined(_WIN32) + // SEH and VEH + ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL); +#else + // On Unix we don't need to raise an exception, we go directly to + // HandleCrash(), then longjmp will unwind the stack for us. + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *)Impl; assert(CRCI && "Crash recovery context never initialized!"); - // As per convention, -2 indicates a crash or timeout as opposed to failure to - // execute (see llvm/include/llvm/Support/Program.h) - CRCI->HandleCrash(-2, 0); + CRCI->HandleCrash(RetCode, 0 /*no sig num*/); +#endif + llvm_unreachable("Most likely setjmp wasn't called!"); } // FIXME: Portability. diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp index 0f13f7a536f1..a9463024c420 100644 --- a/llvm/lib/Support/ErrorHandling.cpp +++ b/llvm/lib/Support/ErrorHandling.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/WindowsError.h" @@ -122,7 +123,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { // files registered with RemoveFileOnSignal. sys::RunInterruptHandlers(); - exit(1); + sys::Process::Exit(1); } void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp index 5b6471008159..509512f643d3 100644 --- a/llvm/lib/Support/Process.cpp +++ b/llvm/lib/Support/Process.cpp @@ -13,8 +13,9 @@ #include "llvm/Support/Process.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/llvm-config.h" #include "llvm/Config/config.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" @@ -88,6 +89,13 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS; bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; } +LLVM_ATTRIBUTE_NORETURN +void Process::Exit(int RetCode) { + if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent()) + CRC->HandleExit(RetCode); + ::exit(RetCode); +} + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Process.inc" diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 8b525f1bd4ac..09e19ae41f1a 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -820,7 +820,13 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { << "\n"; } - LocalPrintStackTrace(llvm::errs(), ep ? ep->ContextRecord : nullptr); + // Stack unwinding appears to modify the context. Copy it to preserve the + // caller's context. + CONTEXT ContextCopy; + if (ep) + memcpy(&ContextCopy, ep->ContextRecord, sizeof(ContextCopy)); + + LocalPrintStackTrace(llvm::errs(), ep ? &ContextCopy : nullptr); return EXCEPTION_EXECUTE_HANDLER; } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index b8953583a310..6da089d1859a 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1000,6 +1000,26 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; + case AArch64::HINT: { + // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for + // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be + // non-empty. If MI is the initial BTI, place the + // __patchable_function_entries label after BTI. + if (CurrentPatchableFunctionEntrySym && + CurrentPatchableFunctionEntrySym == CurrentFnBegin && + MI == &MF->front().front()) { + int64_t Imm = MI->getOperand(0).getImm(); + if ((Imm & 32) && (Imm & 6)) { + MCInst Inst; + MCInstLowering.Lower(MI, Inst); + EmitToStreamer(*OutStreamer, Inst); + CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); + OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); + return; + } + } + break; + } case AArch64::MOVMCSym: { Register DestReg = MI->getOperand(0).getReg(); const MachineOperand &MO_Sym = MI->getOperand(1); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index bc91d628f0b4..cbca29b63b70 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -66,6 +66,10 @@ static cl::opt LdStLimit("aarch64-load-store-scan-limit", static cl::opt UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// Enable register renaming to find additional store pairing opportunities. +static cl::opt EnableRenaming("aarch64-load-store-renaming", + cl::init(false), cl::Hidden); + #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { @@ -1446,6 +1450,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); Optional MaybeCanRename = None; + if (!EnableRenaming) + MaybeCanRename = {false}; + SmallPtrSet RequiredClasses; LiveRegUnits UsedInBetween; UsedInBetween.init(*TRI); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 6f4569a49783..131219ca6944 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -183,7 +183,21 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool &AllowPromotionWithoutCommonHeader); bool shouldExpandReduction(const IntrinsicInst *II) const { - return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + // We don't have legalization support for ordered FP reductions. + return !II->getFastMathFlags().allowReassoc(); + + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + // Lowering asserts that there are no NaNs. + return !II->getFastMathFlags().noNaNs(); + + default: + // Don't expand anything else, let legalization deal with it. + return false; + } } unsigned getGISelRematGlobalCost() const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index fbed51de0ea4..a55a1747cafe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID; void initializeSILowerControlFlowPass(PassRegistry &); extern char &SILowerControlFlowID; -void initializeSIRemoveShortExecBranchesPass(PassRegistry &); -extern char &SIRemoveShortExecBranchesID; - void initializeSIInsertSkipsPass(PassRegistry &); extern char &SIInsertSkipsPassID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eb30d659bf0b..c8dc6f6e3bf4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIModeRegisterPass(*PR); initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); - initializeSIRemoveShortExecBranchesPass(*PR); initializeSIInsertSkipsPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); @@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() { // be better for it to emit S_NOP when possible. addPass(&PostRAHazardRecognizerID); - addPass(&SIRemoveShortExecBranchesID); addPass(&SIInsertSkipsPassID); addPass(&BranchRelaxationPassID); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 191f603a66d6..01bb60f07f2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -117,24 +118,58 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, return true; } +static void removeDoneExport(Function &F) { + ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext()); + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (IntrinsicInst *Intrin = llvm::dyn_cast(&I)) { + if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) { + Intrin->setArgOperand(6, BoolFalse); // done + } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) { + Intrin->setArgOperand(4, BoolFalse); // done + } + } + } + } +} + static BasicBlock *unifyReturnBlockSet(Function &F, ArrayRef ReturningBlocks, + bool InsertExport, const TargetTransformInfo &TTI, StringRef Name) { // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F); + IRBuilder<> B(NewRetBlock); + + if (InsertExport) { + // Ensure that there's only one "done" export in the shader by removing the + // "done" bit set on the original final export. More than one "done" export + // can lead to undefined behavior. + removeDoneExport(F); + + Value *Undef = UndefValue::get(B.getFloatTy()); + B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() }, + { + B.getInt32(9), // target, SQ_EXP_NULL + B.getInt32(0), // enabled channels + Undef, Undef, Undef, Undef, // values + B.getTrue(), // done + B.getTrue(), // valid mask + }); + } PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + B.CreateRetVoid(); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), - "UnifiedRetVal"); - NewRetBlock->getInstList().push_back(PN); - ReturnInst::Create(F.getContext(), PN, NewRetBlock); + PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + assert(!InsertExport); + B.CreateRet(PN); } // Loop over all of the blocks, replacing the return instruction with an @@ -173,6 +208,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { // Dummy return block for infinite loop. BasicBlock *DummyReturnBB = nullptr; + bool InsertExport = false; + for (BasicBlock *BB : PDT.getRoots()) { if (isa(BB->getTerminator())) { if (!isUniformlyReached(DA, *BB)) @@ -188,6 +225,36 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { "DummyReturnBlock", &F); Type *RetTy = F.getReturnType(); Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy); + + // For pixel shaders, the producer guarantees that an export is + // executed before each return instruction. However, if there is an + // infinite loop and we insert a return ourselves, we need to uphold + // that guarantee by inserting a null export. This can happen e.g. in + // an infinite loop with kill instructions, which is supposed to + // terminate. However, we don't need to do this if there is a non-void + // return value, since then there is an epilog afterwards which will + // still export. + // + // Note: In the case where only some threads enter the infinite loop, + // this can result in the null export happening redundantly after the + // original exports. However, The last "real" export happens after all + // the threads that didn't enter an infinite loop converged, which + // means that the only extra threads to execute the null export are + // threads that entered the infinite loop, and they only could've + // exited through being killed which sets their exec bit to 0. + // Therefore, unless there's an actual infinite loop, which can have + // invalid results, or there's a kill after the last export, which we + // assume the frontend won't do, this export will have the same exec + // mask as the last "real" export, and therefore the valid mask will be + // overwritten with the same value and will still be correct. Also, + // even though this forces an extra unnecessary export wait, we assume + // that this happens rare enough in practice to that we don't have to + // worry about performance. + if (F.getCallingConv() == CallingConv::AMDGPU_PS && + RetTy->isVoidTy()) { + InsertExport = true; + } + ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); ReturningBlocks.push_back(DummyReturnBB); } @@ -260,6 +327,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { const TargetTransformInfo &TTI = getAnalysis().getTTI(F); - unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock"); + unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock"); return true; } diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td index 1a526675164a..e2978624811d 100644 --- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td +++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td @@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>; def : RsqPat; +def : SqrtPat; + def : POW_Common ; defm DIV_cm : DIV_Common; @@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { -def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - class RAT_STORE_DWORD mask> : CF_MEM_RAT_CACHELESS <0x14, 0, mask, (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 792e26d21f98..88e554ae0bcc 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def : RsqPat; +def : SqrtPat; + def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common ; -def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; } // End SubtargetPredicate = isEG //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index cbdf0de44f87..869c183e2245 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1233,6 +1233,11 @@ def : R600Pat< def : RcpPat; } +class SqrtPat : R600Pat < + (fsqrt f32:$src), + (RecipInst (RsqInst $src)) +>; + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1272,8 +1277,8 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common; def : POW_Common ; - def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat; + def : SqrtPat; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 80c044ec00cb..87e63fcc4a04 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "si-insert-skips" static cl::opt SkipThresholdFlag( - "amdgpu-skip-threshold-legacy", + "amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden); @@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; switch (MI.getOpcode()) { - case AMDGPU::S_CBRANCH_EXECZ: - ExecBranchStack.push_back(MI.getOperand(0).getMBB()); - break; case AMDGPU::SI_MASK_BRANCH: ExecBranchStack.push_back(MI.getOperand(0).getMBB()); MadeChange |= skipMaskBranch(MI, MBB); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 61d2719a3aad..bf052dc3c930 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec) .addReg(Tmp, RegState::Kill); - // Insert the S_CBRANCH_EXECZ instruction which will be optimized later - // during SIRemoveShortExecBranches. - MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) + // Insert a pseudo terminator to help keep the verifier happy. This will also + // be used later when inserting skips. + MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .add(MI.getOperand(2)); if (!LIS) { @@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { .addReg(DstReg); MachineInstr *Branch = - BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) - .addMBB(DestBB); + BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) + .addMBB(DestBB); if (!LIS) { MI.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp deleted file mode 100644 index 51779e97ac62..000000000000 --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===-- SIRemoveShortExecBranches.cpp ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass optmizes the s_cbranch_execz instructions. -/// The pass removes this skip instruction for short branches, -/// if there is no unwanted sideeffect in the fallthrough code sequence. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-remove-short-exec-branches" - -static unsigned SkipThreshold; - -static cl::opt SkipThresholdFlag( - "amdgpu-skip-threshold", cl::Hidden, - cl::desc( - "Number of instructions before jumping over divergent control flow"), - cl::location(SkipThreshold), cl::init(12)); - -namespace { - -class SIRemoveShortExecBranches : public MachineFunctionPass { -private: - const SIInstrInfo *TII = nullptr; - bool getBlockDestinations(MachineBasicBlock &SrcMBB, - MachineBasicBlock *&TrueMBB, - MachineBasicBlock *&FalseMBB, - SmallVectorImpl &Cond); - bool mustRetainExeczBranch(const MachineBasicBlock &From, - const MachineBasicBlock &To) const; - bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); - -public: - static char ID; - - SIRemoveShortExecBranches() : MachineFunctionPass(ID) { - initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // End anonymous namespace. - -INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE, - "SI remove short exec branches", false, false) - -char SIRemoveShortExecBranches::ID = 0; - -char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID; - -bool SIRemoveShortExecBranches::getBlockDestinations( - MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, - MachineBasicBlock *&FalseMBB, SmallVectorImpl &Cond) { - if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond)) - return false; - - if (!FalseMBB) - FalseMBB = SrcMBB.getNextNode(); - - return true; -} - -bool SIRemoveShortExecBranches::mustRetainExeczBranch( - const MachineBasicBlock &From, const MachineBasicBlock &To) const { - unsigned NumInstr = 0; - const MachineFunction *MF = From.getParent(); - - for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); - MBBI != End && MBBI != ToI; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) - return true; - - if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) - return true; - - // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || - I->getOpcode() == AMDGPU::S_WAITCNT) - return true; - - ++NumInstr; - if (NumInstr >= SkipThreshold) - return true; - } - } - - return false; -} - -// Returns true if the skip branch instruction is removed. -bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI, - MachineBasicBlock &SrcMBB) { - MachineBasicBlock *TrueMBB = nullptr; - MachineBasicBlock *FalseMBB = nullptr; - SmallVector Cond; - - if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond)) - return false; - - // Consider only the forward branches. - if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || - mustRetainExeczBranch(*FalseMBB, *TrueMBB)) - return false; - - LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI); - MI.eraseFromParent(); - SrcMBB.removeSuccessor(TrueMBB); - - return true; -} - -bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) { - const GCNSubtarget &ST = MF.getSubtarget(); - TII = ST.getInstrInfo(); - MF.RenumberBlocks(); - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); - if (MBBI == MBB.end()) - continue; - - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case AMDGPU::S_CBRANCH_EXECZ: - Changed = removeExeczBranch(MI, MBB); - break; - default: - break; - } - } - - return Changed; -} diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5271bc3aacc6..8b21b9346987 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -559,7 +559,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL; + return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 634fb89b8e89..66ad120a111f 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -330,8 +330,8 @@ void ARMConstantIslands::verify() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// print block size and offset information - debugging LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { - BBInfoVector &BBInfo = BBUtils->getBBInfo(); LLVM_DEBUG({ + BBInfoVector &BBInfo = BBUtils->getBBInfo(); for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2c3ac816219f..de4377ec5a47 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1952,24 +1952,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::LOADDUAL: - case ARM::STOREDUAL: { - Register PairReg = MI.getOperand(0).getReg(); - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) - .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), - Opcode == ARM::LOADDUAL ? RegState::Define : 0) - .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), - Opcode == ARM::LOADDUAL ? RegState::Define : 0); - for (unsigned i = 1; i < MI.getNumOperands(); i++) - MIB.add(MI.getOperand(i)); - MIB.add(predOps(ARMCC::AL)); - MIB.cloneMemRefs(MI); - MI.eraseFromParent(); - return true; - } } } diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 76a9ac12062d..9b06987178d8 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -145,8 +145,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel { // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); - template - bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, @@ -1296,33 +1294,6 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, return true; } -template -bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, - SDValue &OffImm) { - if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } - - if (N.getOpcode() == ISD::SUB) - RHSC = -RHSC; - OffImm = - CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); - return true; - } - } - - // Base only. - Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. @@ -3515,26 +3486,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LDRD: { - if (Subtarget->isThumb2()) - break; // TableGen handles isel in this case. - SDValue Base, RegOffset, ImmOffset; - const SDValue &Chain = N->getOperand(0); - const SDValue &Addr = N->getOperand(1); - SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); - SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; - SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, - {MVT::Untyped, MVT::Other}, Ops); - SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, - SDValue(New, 0)); - SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, - SDValue(New, 0)); - ReplaceUses(SDValue(N, 0), Lo); - ReplaceUses(SDValue(N, 1), Hi); - ReplaceUses(SDValue(N, 2), SDValue(New, 1)); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LOOP_DEC: { SDValue Ops[] = { N->getOperand(1), N->getOperand(2), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cf738cd66434..1e6f7d889201 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1073,8 +1073,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - setOperationAction(ISD::LOAD, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl // assuming that ISD::SRL and SRA of i64 are already marked custom @@ -1598,9 +1596,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; - case ARMISD::LDRD: return "ARMISD::LDRD"; - case ARMISD::STRD: return "ARMISD::STRD"; - case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; @@ -9088,24 +9083,6 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); } -void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const { - LoadSDNode *LD = cast(N); - EVT MemVT = LD->getMemoryVT(); - assert(LD->isUnindexed() && "Loads should be unindexed at this point."); - - if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && - !Subtarget->isThumb1Only() && LD->isVolatile()) { - SDLoc dl(N); - SDValue Result = DAG.getMemIntrinsicNode( - ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}), - {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand()); - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, - Result.getValue(0), Result.getValue(1)); - Results.append({Pair, Result.getValue(2)}); - } -} - static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast(Op.getNode()); EVT MemVT = ST->getMemoryVT(); @@ -9135,34 +9112,6 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } -static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - StoreSDNode *ST = cast(Op.getNode()); - EVT MemVT = ST->getMemoryVT(); - assert(ST->isUnindexed() && "Stores should be unindexed at this point."); - - if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && - !Subtarget->isThumb1Only() && ST->isVolatile()) { - SDNode *N = Op.getNode(); - SDLoc dl(N); - - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), - DAG.getTargetConstant(0, dl, MVT::i32)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), - DAG.getTargetConstant(1, dl, MVT::i32)); - - return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other), - {ST->getChain(), Lo, Hi, ST->getBasePtr()}, - MemVT, ST->getMemOperand()); - } else if (Subtarget->hasMVEIntegerOps() && - ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || - MemVT == MVT::v16i1))) { - return LowerPredicateStore(Op, DAG); - } - - return SDValue(); -} - static bool isZeroVector(SDValue N) { return (ISD::isBuildVectorAllZeros(N.getNode()) || (N->getOpcode() == ARMISD::VMOVIMM && @@ -9350,7 +9299,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: - return LowerSTORE(Op, DAG, Subtarget); + return LowerPredicateStore(Op, DAG); case ISD::MLOAD: return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: @@ -9452,9 +9401,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ABS: lowerABS(N, Results, DAG); return ; - case ISD::LOAD: - LowerLOAD(N, Results, DAG); - break; + } if (Res.getNode()) Results.push_back(Res); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 1baa22a4fa56..cc74e5d875d8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -278,11 +278,7 @@ class VectorType; VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // Load/Store of dual registers - LDRD, - STRD + VST4LN_UPD }; } // end namespace ARMISD @@ -735,8 +731,6 @@ class VectorType; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; - void LowerLOAD(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index ce67af6f1b49..3efe85a7d45c 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -243,12 +243,6 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; -def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -2701,14 +2695,6 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { Requires<[IsARM, HasV5TE]>; } -let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { -def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr), - 64, IIC_iLoad_d_r, []>, - Requires<[IsARM, HasV5TE]> { - let AM = AddrMode3; -} -} - def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "lda", "\t$Rt, $addr", []>; def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -2984,19 +2970,6 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { } } -let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { -def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr), - 64, IIC_iStore_d_r, []>, - Requires<[IsARM, HasV5TE]> { - let AM = AddrMode3; -} -} - -let Predicates = [IsARM, HasV5TE] in { -def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr), - (STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>; -} - // Indexed stores multiclass AI2_stridx { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 4193e8147f47..c5aae235f25d 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -270,8 +270,7 @@ def t2am_imm8_offset : MemOperand, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -class T2AddrMode_Imm8s4 : MemOperand, - ComplexPattern", []> { +class T2AddrMode_Imm8s4 : MemOperand { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; @@ -1449,8 +1448,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", - [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>, + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1631,8 +1629,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", - [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>, + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, Sched<[WriteST]>; // Indexed stores diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 880588adfdfd..f66083eaf187 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -171,7 +171,26 @@ class ARMTTIImpl : public BasicTTIImplBase { TTI::ReductionFlags Flags) const; bool shouldExpandReduction(const IntrinsicInst *II) const { - return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + // We don't have legalization support for ordered FP reductions. + if (!II->getFastMathFlags().allowReassoc()) + return true; + // Can't legalize reductions with soft floats. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); + + case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::experimental_vector_reduce_fmax: + // Can't legalize reductions with soft floats, and NoNan will create + // fminimum which we do not know how to lower. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() || + !II->getFastMathFlags().noNaNs(); + + default: + // Don't expand anything else, let legalization deal with it. + return false; + } } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index b81bf4e1320d..cbae4675cb14 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -110,6 +110,19 @@ class BPFTargetLowering : public TargetLowering { return true; } + // Prevent reducing load width during SelectionDag phase. + // Otherwise, we may transform the following + // ctx = ctx + reloc_offset + // ... (*(u32 *)ctx) & 0x8000... + // to + // ctx = ctx + reloc_offset + // ... (*(u8 *)(ctx + 1)) & 0x80 ... + // which will be rejected by the verifier. + bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, + EVT NewVT) const override { + return false; + } + unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg, bool isSigned) const; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 5310f0f07b65..29abc9303a62 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -70,9 +70,10 @@ struct BPFMISimplifyPatchable : public MachineFunctionPass { public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { - if (!skipFunction(MF.getFunction())) { - initialize(MF); - } + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); return removeLD(); } }; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 53562f42a184..c7efdf42a7c6 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -195,12 +195,13 @@ class RISCVAsmParser : public MCTargetAsmParser { Parser.addAliasForDirective(".dword", ".8byte"); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - if (Options.ABIName.back() == 'f' && + auto ABIName = StringRef(Options.ABIName); + if (ABIName.endswith("f") && !getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) { errs() << "Hard-float 'f' ABI can't be used for a target that " "doesn't support the F instruction set extension (ignoring " "target-abi)\n"; - } else if (Options.ABIName.back() == 'd' && + } else if (ABIName.endswith("d") && !getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) { errs() << "Hard-float 'd' ABI can't be used for a target that " "doesn't support the D instruction set extension (ignoring " diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 82afa13aece3..770e883221d1 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -92,10 +92,13 @@ include "RISCVSystemOperands.td" // Registers, calling conventions, instruction descriptions. //===----------------------------------------------------------------------===// +include "RISCVSchedule.td" include "RISCVRegisterInfo.td" include "RISCVCallingConv.td" include "RISCVInstrInfo.td" include "RISCVRegisterBanks.td" +include "RISCVSchedRocket32.td" +include "RISCVSchedRocket64.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. @@ -106,6 +109,12 @@ def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>; def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit, FeatureRVCHints]>; +def : ProcessorModel<"rocket-rv32", Rocket32Model, [FeatureRVCHints]>; + +def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit, + FeatureRVCHints]>; + + //===----------------------------------------------------------------------===// // Define the RISC-V target. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 7229ebfe1db0..3ed10cca5377 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -103,7 +103,8 @@ class RVInst pattern, string opcodestr = "", string argstr = ""> - : RVInst { + : RVInst, + Sched<[]> { let isPseudo = 1; let isCodeGenOnly = 1; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 8e9ad4965583..81f1abe8337e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -298,7 +298,8 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class BranchCC_rri funct3, string opcodestr> : RVInstB { + opcodestr, "$rs1, $rs2, $imm12">, + Sched<[WriteJmp]> { let isBranch = 1; let isTerminator = 1; } @@ -320,13 +321,15 @@ class Store_rri funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_ri funct3, string opcodestr> : RVInstI; + opcodestr, "$rd, $rs1, $imm12">, + Sched<[WriteIALU, ReadIALU]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_ri funct3, string opcodestr> : RVInstIShift; + "$rd, $rs1, $shamt">, + Sched<[WriteShift, ReadShift]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_rr funct7, bits<3> funct3, string opcodestr> @@ -336,19 +339,20 @@ class ALU_rr funct7, bits<3> funct3, string opcodestr> let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ir funct3, string opcodestr> : RVInstI; + opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ii funct3, string opcodestr> : RVInstI; + opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ShiftW_ri funct3, string opcodestr> : RVInstIShiftW; + "$rd, $rs1, $shamt">, + Sched<[WriteShift32, ReadShift32]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALUW_rr funct7, bits<3> funct3, string opcodestr> @@ -367,19 +371,20 @@ class Priv funct7> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let isReMaterializable = 1, isAsCheapAsAMove = 1 in def LUI : RVInstU; + "lui", "$rd, $imm20">, Sched<[WriteIALU]>; def AUIPC : RVInstU; + "auipc", "$rd, $imm20">, Sched<[WriteIALU]>; let isCall = 1 in def JAL : RVInstJ; + "jal", "$rd, $imm20">, Sched<[WriteJal]>; let isCall = 1 in def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - "jalr", "$rd, ${imm12}(${rs1})">; + "jalr", "$rd, ${imm12}(${rs1})">, + Sched<[WriteJalr, ReadJalr]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 def BEQ : BranchCC_rri<0b000, "beq">; @@ -389,15 +394,15 @@ def BGE : BranchCC_rri<0b101, "bge">; def BLTU : BranchCC_rri<0b110, "bltu">; def BGEU : BranchCC_rri<0b111, "bgeu">; -def LB : Load_ri<0b000, "lb">; -def LH : Load_ri<0b001, "lh">; -def LW : Load_ri<0b010, "lw">; -def LBU : Load_ri<0b100, "lbu">; -def LHU : Load_ri<0b101, "lhu">; +def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>; +def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>; +def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>; +def LBU : Load_ri<0b100, "lbu">, Sched<[WriteLDB, ReadMemBase]>; +def LHU : Load_ri<0b101, "lhu">, Sched<[WriteLDH, ReadMemBase]>; -def SB : Store_rri<0b000, "sb">; -def SH : Store_rri<0b001, "sh">; -def SW : Store_rri<0b010, "sw">; +def SB : Store_rri<0b000, "sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; +def SH : Store_rri<0b001, "sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; +def SW : Store_rri<0b010, "sw">, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; // ADDI isn't always rematerializable, but isReMaterializable will be used as // a hint which is verified in isReallyTriviallyReMaterializable. @@ -418,21 +423,21 @@ def SLLI : Shift_ri<0, 0b001, "slli">; def SRLI : Shift_ri<0, 0b101, "srli">; def SRAI : Shift_ri<1, 0b101, "srai">; -def ADD : ALU_rr<0b0000000, 0b000, "add">; -def SUB : ALU_rr<0b0100000, 0b000, "sub">; -def SLL : ALU_rr<0b0000000, 0b001, "sll">; -def SLT : ALU_rr<0b0000000, 0b010, "slt">; -def SLTU : ALU_rr<0b0000000, 0b011, "sltu">; -def XOR : ALU_rr<0b0000000, 0b100, "xor">; -def SRL : ALU_rr<0b0000000, 0b101, "srl">; -def SRA : ALU_rr<0b0100000, 0b101, "sra">; -def OR : ALU_rr<0b0000000, 0b110, "or">; -def AND : ALU_rr<0b0000000, 0b111, "and">; +def ADD : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SUB : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLL : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLT : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def XOR : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SRL : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SRA : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def OR : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def AND : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins fencearg:$pred, fencearg:$succ), - "fence", "$pred, $succ"> { + "fence", "$pred, $succ">, Sched<[]> { bits<4> pred; bits<4> succ; @@ -441,25 +446,26 @@ def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), let imm12 = {0b0000,pred,succ}; } -def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", ""> { +def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", "">, Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = {0b1000,0b0011,0b0011}; } -def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> { +def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", "">, Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 0; } -def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> { +def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", "">, Sched<[WriteJmp]> { let rs1 = 0; let rd = 0; let imm12 = 0; } -def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { +def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", "">, + Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 1; @@ -468,7 +474,8 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { // This is a de facto standard (as set by GNU binutils) 32-bit unimplemented // instruction (i.e., it should always trap, if your implementation has invalid // instruction traps). -def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> { +def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", "">, + Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 0b110000000000; @@ -486,24 +493,30 @@ def CSRRCI : CSR_ii<0b111, "csrrci">; /// RV64I instructions let Predicates = [IsRV64] in { -def LWU : Load_ri<0b110, "lwu">; -def LD : Load_ri<0b011, "ld">; -def SD : Store_rri<0b011, "sd">; +def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDWU, ReadMemBase]>; +def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>; +def SD : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - "addiw", "$rd, $rs1, $imm12">; + "addiw", "$rd, $rs1, $imm12">, + Sched<[WriteIALU32, ReadIALU32]>; def SLLIW : ShiftW_ri<0, 0b001, "slliw">; def SRLIW : ShiftW_ri<0, 0b101, "srliw">; def SRAIW : ShiftW_ri<1, 0b101, "sraiw">; -def ADDW : ALUW_rr<0b0000000, 0b000, "addw">; -def SUBW : ALUW_rr<0b0100000, 0b000, "subw">; -def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">; -def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">; -def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; +def ADDW : ALUW_rr<0b0000000, 0b000, "addw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SUBW : ALUW_rr<0b0100000, 0b000, "subw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; } // Predicates = [IsRV64] //===----------------------------------------------------------------------===// @@ -511,26 +524,26 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; //===----------------------------------------------------------------------===// let isBarrier = 1, isReturn = 1, isTerminator = 1 in { -def URET : Priv<"uret", 0b0000000> { +def URET : Priv<"uret", 0b0000000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } -def SRET : Priv<"sret", 0b0001000> { +def SRET : Priv<"sret", 0b0001000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } -def MRET : Priv<"mret", 0b0011000> { +def MRET : Priv<"mret", 0b0011000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } } // isBarrier = 1, isReturn = 1, isTerminator = 1 -def WFI : Priv<"wfi", 0b0001000> { +def WFI : Priv<"wfi", 0b0001000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00101; @@ -539,7 +552,7 @@ def WFI : Priv<"wfi", 0b0001000> { let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1, GPR:$rs2), - "sfence.vma", "$rs1, $rs2"> { + "sfence.vma", "$rs1, $rs2">, Sched<[]> { let rd = 0; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 7321f4bd9d2f..de73c8df9367 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -77,31 +77,51 @@ multiclass AtomicStPat { //===----------------------------------------------------------------------===// let Predicates = [HasStdExtA] in { -defm LR_W : LR_r_aq_rl<0b010, "lr.w">; -defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">; -defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">; -defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">; -defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">; -defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">; -defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">; -defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">; -defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">; -defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">; -defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">; +defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>; +defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">, + Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>; +defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; } // Predicates = [HasStdExtA] let Predicates = [HasStdExtA, IsRV64] in { -defm LR_D : LR_r_aq_rl<0b011, "lr.d">; -defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">; -defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">; -defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">; -defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">; -defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">; -defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">; -defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">; -defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">; -defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">; -defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">; +defm LR_D : LR_r_aq_rl<0b011, "lr.d">, Sched<[WriteAtomicLDD, ReadAtomicLDD]>; +defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">, + Sched<[WriteAtomicSTD, ReadAtomicSTD, ReadAtomicSTD]>; +defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; } // Predicates = [HasStdExtA, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index fa0050f107b2..f68767847ade 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -282,7 +282,8 @@ let Predicates = [HasStdExtC] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), (ins SP:$rs1, uimm10_lsb00nonzero:$imm), - "c.addi4spn", "$rd, $rs1, $imm"> { + "c.addi4spn", "$rd, $rs1, $imm">, + Sched<[WriteIALU, ReadIALU]> { bits<5> rs1; let Inst{12-11} = imm{5-4}; let Inst{10-7} = imm{9-6}; @@ -291,13 +292,15 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> { +def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, + Sched<[WriteFLD64, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { +def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -306,7 +309,8 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> { +def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, + Sched<[WriteFLD32, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -314,20 +318,23 @@ def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> { } let Predicates = [HasStdExtC, IsRV64] in -def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> { +def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, + Sched<[WriteLDD, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> { +def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { +def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -336,7 +343,8 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> { +def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -344,14 +352,16 @@ def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> { } let Predicates = [HasStdExtC, IsRV64] in -def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { +def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>, + Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> +def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">, + Sched<[WriteNop]> { let Inst{6-2} = 0; } @@ -359,7 +369,8 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, simm6nonzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } @@ -367,7 +378,8 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let isAsmParserOnly = 1; @@ -377,27 +389,30 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32Only_", Defs = [X1], Predicates = [HasStdExtC, IsRV32] in def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset), - "c.jal", "$offset">; + "c.jal", "$offset">, Sched<[WriteJal]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Predicates = [HasStdExtC, IsRV64] in def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, simm6:$imm), - "c.addiw", "$rd, $imm"> { + "c.addiw", "$rd, $imm">, + Sched<[WriteIALU32, ReadIALU32]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm"> { + "c.li", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), (ins SP:$rd, simm10_lsb0000nonzero:$imm), - "c.addi16sp", "$rd, $imm"> { + "c.addi16sp", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{12} = imm{9}; let Inst{11-7} = 2; @@ -410,78 +425,93 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm"> { + "c.lui", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; } -def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>; -def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>; +def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>, + Sched<[WriteShift, ReadShift]>; +def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>, + Sched<[WriteShift, ReadShift]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:$imm), - "c.andi", "$rs1, $imm"> { + "c.andi", "$rs1, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; let Inst{12} = imm{5}; let Inst{11-10} = 0b10; let Inst{6-2} = imm{4-0}; } -def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>; -def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>; -def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>; -def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>; +def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; let Predicates = [HasStdExtC, IsRV64] in { -def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>; -def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>; +def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset), - "c.j", "$offset"> { + "c.j", "$offset">, Sched<[WriteJmp]> { let isBranch = 1; let isTerminator=1; let isBarrier=1; } -def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>; -def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>; +def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>, Sched<[WriteJmp]>; +def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>, Sched<[WriteJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli" ,"$rd, $imm"> { + "c.slli" ,"$rd, $imm">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> { +def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, + Sched<[WriteFLD64, ReadMemBase]> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } -def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> { +def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { let Inst{6-4} = imm{4-2}; let Inst{3-2} = imm{7-6}; } let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00> { +def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, + Sched<[WriteFLD32, ReadMemBase]> { let Inst{6-4} = imm{4-2}; let Inst{3-2} = imm{7-6}; } let Predicates = [HasStdExtC, IsRV64] in -def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> { +def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, + Sched<[WriteLDD, ReadMemBase]> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), - "c.jr", "$rs1"> { + "c.jr", "$rs1">, Sched<[WriteJmpReg]> { let isBranch = 1; let isBarrier = 1; let isTerminator = 1; @@ -491,43 +521,49 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2">; + "c.mv", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU]>; let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">; +def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">, Sched<[]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall=1, Defs=[X1], rs2 = 0 in def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1), - "c.jalr", "$rs1">; + "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rs1_wb), (ins GPRNoX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2"> { + "c.add", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> { +def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } -def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> { +def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { let Inst{12-9} = imm{5-2}; let Inst{8-7} = imm{7-6}; } let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00> { +def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]> { let Inst{12-9} = imm{5-2}; let Inst{8-7} = imm{7-6}; } let Predicates = [HasStdExtC, IsRV64] in -def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { +def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>, + Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } @@ -535,7 +571,8 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { // The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU // binutils as 16-bit instruction known to be unimplemented (i.e., trapping). let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> { +def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>, + Sched<[]> { let Inst{15-0} = 0; } @@ -551,7 +588,7 @@ let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, let rd = 0 in def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), - "c.nop", "$imm"> { + "c.nop", "$imm">, Sched<[WriteNop]> { let Inst{6-2} = imm{4-0}; let DecoderMethod = "decodeRVCInstrSImm"; } @@ -559,7 +596,8 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), // Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, simm6nonzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; let isAsmParserOnly = 1; @@ -567,14 +605,16 @@ def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let isAsmParserOnly = 1; } def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm"> { + "c.li", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdSImm"; @@ -582,14 +622,15 @@ def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm"> { + "c.lui", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdSImm"; } def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2"> + "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> { let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdRs2"; @@ -597,7 +638,8 @@ def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), (ins GPRX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2"> { + "c.add", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; @@ -605,7 +647,8 @@ def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli" ,"$rd, $imm"> { + "c.slli" ,"$rd, $imm">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; @@ -613,7 +656,8 @@ def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), } def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), - "c.slli64" ,"$rd"> { + "c.slli64" ,"$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{12} = 0; @@ -621,7 +665,8 @@ def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), (ins GPRC:$rd), - "c.srli64", "$rd"> { + "c.srli64", "$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{11-10} = 0; @@ -630,7 +675,8 @@ def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), (ins GPRC:$rd), - "c.srai64", "$rd"> { + "c.srai64", "$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{11-10} = 1; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b5343e8a8309..4a036eb52bb8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -42,13 +42,15 @@ class FPFMADDynFrmAlias let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUD_rr funct7, bits<3> funct3, string opcodestr> : RVInstR; + (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUD_rr_frm funct7, string opcodestr> : RVInstRFrm; + "$rd, $rs1, $rs2, $funct3">, + Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; class FPALUDDynFrmAlias : InstAlias let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPCmpD_rr funct3, string opcodestr> : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>; //===----------------------------------------------------------------------===// // Instructions @@ -68,7 +71,8 @@ let Predicates = [HasStdExtD] in { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), (ins GPR:$rs1, simm12:$imm12), - "fld", "$rd, ${imm12}(${rs1})">; + "fld", "$rd, ${imm12}(${rs1})">, + Sched<[WriteFLD64, ReadMemBase]>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction @@ -76,15 +80,20 @@ def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in def FSD : RVInstS<0b011, OPC_STORE_FP, (outs), (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12), - "fsd", "$rs2, ${imm12}(${rs1})">; + "fsd", "$rs2, ${imm12}(${rs1})">, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]>; -def FMADD_D : FPFMAD_rrr_frm; +def FMADD_D : FPFMAD_rrr_frm, + Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>; def : FPFMADDynFrmAlias; -def FMSUB_D : FPFMAD_rrr_frm; +def FMSUB_D : FPFMAD_rrr_frm, + Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>; def : FPFMADDynFrmAlias; -def FNMSUB_D : FPFMAD_rrr_frm; +def FNMSUB_D : FPFMAD_rrr_frm, + Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>; def : FPFMADDynFrmAlias; -def FNMADD_D : FPFMAD_rrr_frm; +def FNMADD_D : FPFMAD_rrr_frm, + Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>; def : FPFMADDynFrmAlias; def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">; @@ -96,7 +105,8 @@ def : FPALUDDynFrmAlias; def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">; def : FPALUDDynFrmAlias; -def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d"> { +def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">, + Sched<[WriteFSqrt32, ReadFSqrt32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias; @@ -107,12 +117,14 @@ def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">; def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">; def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">; -def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d"> { +def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">, + Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias; -def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s"> { +def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">, + Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> { let rs2 = 0b00000; } @@ -120,55 +132,66 @@ def FEQ_D : FPCmpD_rr<0b010, "feq.d">; def FLT_D : FPCmpD_rr<0b001, "flt.d">; def FLE_D : FPCmpD_rr<0b000, "fle.d">; -def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d"> { +def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">, + Sched<[WriteFClass64, ReadFClass64]> { let rs2 = 0b00000; } -def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d"> { +def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias; -def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d"> { +def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias; -def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w"> { +def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { let rs2 = 0b00000; } -def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu"> { +def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { let rs2 = 0b00001; } } // Predicates = [HasStdExtD] let Predicates = [HasStdExtD, IsRV64] in { -def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d"> { +def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias; -def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d"> { +def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias; -def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d"> { +def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">, + Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> { let rs2 = 0b00000; } -def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l"> { +def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias; -def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu"> { +def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias; -def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> { +def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">, + Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> { let rs2 = 0b00000; } } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 3b73c865ea17..782c3f65af14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -60,7 +60,8 @@ class FPFMASDynFrmAlias let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUS_rr funct7, bits<3> funct3, string opcodestr> : RVInstR; + (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUS_rr_frm funct7, string opcodestr> @@ -93,7 +94,8 @@ class FPUnaryOpDynFrmAlias funct3, string opcodestr> : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>; //===----------------------------------------------------------------------===// // Instructions @@ -103,7 +105,8 @@ let Predicates = [HasStdExtF] in { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), (ins GPR:$rs1, simm12:$imm12), - "flw", "$rd, ${imm12}(${rs1})">; + "flw", "$rd, ${imm12}(${rs1})">, + Sched<[WriteFLD32, ReadMemBase]>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction @@ -111,27 +114,37 @@ def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in def FSW : RVInstS<0b010, OPC_STORE_FP, (outs), (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12), - "fsw", "$rs2, ${imm12}(${rs1})">; + "fsw", "$rs2, ${imm12}(${rs1})">, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]>; -def FMADD_S : FPFMAS_rrr_frm; +def FMADD_S : FPFMAS_rrr_frm, + Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>; def : FPFMASDynFrmAlias; -def FMSUB_S : FPFMAS_rrr_frm; +def FMSUB_S : FPFMAS_rrr_frm, + Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>; def : FPFMASDynFrmAlias; -def FNMSUB_S : FPFMAS_rrr_frm; +def FNMSUB_S : FPFMAS_rrr_frm, + Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>; def : FPFMASDynFrmAlias; -def FNMADD_S : FPFMAS_rrr_frm; +def FNMADD_S : FPFMAS_rrr_frm, + Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>; def : FPFMASDynFrmAlias; -def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">; +def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; def : FPALUSDynFrmAlias; -def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">; +def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; def : FPALUSDynFrmAlias; -def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">; +def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">, + Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>; def : FPALUSDynFrmAlias; -def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">; +def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">, + Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>; def : FPALUSDynFrmAlias; -def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s"> { +def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">, + Sched<[WriteFSqrt32, ReadFSqrt32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias; @@ -142,17 +155,20 @@ def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">; def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">; def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">; -def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s"> { +def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias; -def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s"> { +def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias; -def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w"> { +def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">, + Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> { let rs2 = 0b00000; } @@ -160,42 +176,50 @@ def FEQ_S : FPCmpS_rr<0b010, "feq.s">; def FLT_S : FPCmpS_rr<0b001, "flt.s">; def FLE_S : FPCmpS_rr<0b000, "fle.s">; -def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s"> { +def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">, + Sched<[WriteFClass32, ReadFClass32]> { let rs2 = 0b00000; } -def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w"> { +def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias; -def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu"> { +def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias; -def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x"> { +def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">, + Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> { let rs2 = 0b00000; } } // Predicates = [HasStdExtF] let Predicates = [HasStdExtF, IsRV64] in { -def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s"> { +def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias; -def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s"> { +def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias; -def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l"> { +def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias; -def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu"> { +def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index e75151ba99c7..987534aadd79 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -24,22 +24,35 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtM] in { -def MUL : ALU_rr<0b0000001, 0b000, "mul">; -def MULH : ALU_rr<0b0000001, 0b001, "mulh">; -def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">; -def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">; -def DIV : ALU_rr<0b0000001, 0b100, "div">; -def DIVU : ALU_rr<0b0000001, 0b101, "divu">; -def REM : ALU_rr<0b0000001, 0b110, "rem">; -def REMU : ALU_rr<0b0000001, 0b111, "remu">; +def MUL : ALU_rr<0b0000001, 0b000, "mul">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULH : ALU_rr<0b0000001, 0b001, "mulh">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def DIV : ALU_rr<0b0000001, 0b100, "div">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def DIVU : ALU_rr<0b0000001, 0b101, "divu">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def REM : ALU_rr<0b0000001, 0b110, "rem">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def REMU : ALU_rr<0b0000001, 0b111, "remu">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; } // Predicates = [HasStdExtM] let Predicates = [HasStdExtM, IsRV64] in { -def MULW : ALUW_rr<0b0000001, 0b000, "mulw">; -def DIVW : ALUW_rr<0b0000001, 0b100, "divw">; -def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">; -def REMW : ALUW_rr<0b0000001, 0b110, "remw">; -def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">; +def MULW : ALUW_rr<0b0000001, 0b000, "mulw">, + Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>; +def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def REMW : ALUW_rr<0b0000001, 0b110, "remw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; } // Predicates = [HasStdExtM, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td new file mode 100644 index 000000000000..8a91a70b61c7 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td @@ -0,0 +1,213 @@ +//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Rocket machine model for scheduling and other instruction cost heuristics. +def Rocket32Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. + let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Rocket is in-order. + +let BufferSize = 0 in { +def Rocket32UnitALU : ProcResource<1>; // Int ALU +def Rocket32UnitIMul : ProcResource<1>; // Int Multiply +def Rocket32UnitMem : ProcResource<1>; // Load/Store +def Rocket32UnitB : ProcResource<1>; // Branch + +def Rocket32UnitFPALU : ProcResource<1>; // FP ALU +} + +let BufferSize = 1 in { +def Rocket32UnitIDiv : ProcResource<1>; // Int Division +def Rocket32UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt' +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = Rocket32Model in { + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +// Multiplies on Rocket differ by implementation; placeholder until +// we can determine how to read from command line +def : WriteRes { let Latency = 4; } + +// 32-bit divides have worse case latency of 34 cycle +def : WriteRes { + let Latency = 34; + let ResourceCycles = [34]; +} + +// Memory +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +def : WriteRes; + +// Most FP single precision operations are 4 cycles +def : WriteRes { let Latency = 4; } + +// Most FP double precision operations are 6 cycles +def : WriteRes { let Latency = 6; } + +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 5 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency +let Latency = 20, ResourceCycles = [20] in { +def : WriteRes; +def : WriteRes; +} + +// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency +def : WriteRes { let Latency = 20; + let ResourceCycles = [20];} +def : WriteRes { let Latency = 25; + let ResourceCycles = [25];} + +def : WriteRes; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +let Unsupported = 1 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types with cycles. +// Dummy definitions for RocketCore. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td new file mode 100644 index 000000000000..79e79f90f2f0 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td @@ -0,0 +1,214 @@ +//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Rocket machine model for scheduling and other instruction cost heuristics. +def Rocket64Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. + let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Rocket is in-order. + +let BufferSize = 0 in { +def Rocket64UnitALU : ProcResource<1>; // Int ALU +def Rocket64UnitIMul : ProcResource<1>; // Int Multiply +def Rocket64UnitMem : ProcResource<1>; // Load/Store +def Rocket64UnitB : ProcResource<1>; // Branch + +def Rocket64UnitFPALU : ProcResource<1>; // FP ALU +} + +let BufferSize = 1 in { +def Rocket64UnitIDiv : ProcResource<1>; // Int Division +def Rocket64UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = Rocket64Model in { + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +} + +// Integer divide varies based on operand magnitude and sign; worse case latency is 34. +def : WriteRes { + let Latency = 34; + let ResourceCycles = [34]; +} +def : WriteRes { + let Latency = 33; + let ResourceCycles = [33]; +} + +// Memory +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +def : WriteRes; +def : WriteRes; + +// Most FP single precision operations are 4 cycles +def : WriteRes { let Latency = 4; } + +// Most FP double precision operations are 6 cycles +def : WriteRes { let Latency = 6; } + +// Conversion instructions +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 5 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 7 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency +let Latency = 20, ResourceCycles = [20] in { +def : WriteRes; +def : WriteRes; +} + +// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency +def : WriteRes { let Latency = 20; + let ResourceCycles = [20]; } +def : WriteRes { let Latency = 25; + let ResourceCycles = [25]; } + +def : WriteRes; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types with cycles. +// Dummy definitions for RocketCore. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td new file mode 100644 index 000000000000..9e2762a5d171 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -0,0 +1,138 @@ +//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// Define scheduler resources associated with def operands. +def WriteIALU : SchedWrite; // 32 or 64-bit integer ALU operations +def WriteIALU32 : SchedWrite; // 32-bit integer ALU operations on RV64I +def WriteShift32 : SchedWrite; // 32-bit shift operations on RV64Ix +def WriteShift : SchedWrite; // 32 or 64-bit shift operations +def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder +def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I +def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply +def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I +def WriteJmp : SchedWrite; // Jump +def WriteJal : SchedWrite; // Jump and link +def WriteJalr : SchedWrite; // Jump and link register +def WriteJmpReg : SchedWrite; // Jump register +def WriteNop : SchedWrite; +def WriteLDB : SchedWrite; // Load byte +def WriteLDH : SchedWrite; // Load half-word +def WriteLDW : SchedWrite; // Load word +def WriteLDWU : SchedWrite; // Load word unsigned +def WriteLDD : SchedWrite; // Load double-word +def WriteCSR : SchedWrite; // CSR instructions +def WriteSTB : SchedWrite; // Store byte +def WriteSTH : SchedWrite; // Store half-word +def WriteSTW : SchedWrite; // Store word +def WriteSTD : SchedWrite; // Store double-word +def WriteAtomicW : SchedWrite; //Atomic memory operation word size +def WriteAtomicD : SchedWrite; //Atomic memory operation double word size +def WriteAtomicLDW : SchedWrite; // Atomic load word +def WriteAtomicLDD : SchedWrite; // Atomic load double word +def WriteAtomicSTW : SchedWrite; // Atomic store word +def WriteAtomicSTD : SchedWrite; // Atomic store double word +def WriteFALU32 : SchedWrite; // FP 32-bit computation +def WriteFALU64 : SchedWrite; // FP 64-bit computation +def WriteFMul32 : SchedWrite; // 32-bit floating point multiply +def WriteFMulAdd32 : SchedWrite; // 32-bit floating point multiply add +def WriteFMulSub32 : SchedWrite; // 32-bit floating point multiply sub +def WriteFMul64 : SchedWrite; // 64-bit floating point multiply +def WriteFMulAdd64 : SchedWrite; // 64-bit floating point multiply add +def WriteFMulSub64 : SchedWrite; // 64-bit floating point multiply sub +def WriteFDiv32 : SchedWrite; // 32-bit floating point divide +def WriteFDiv64 : SchedWrite; // 64-bit floating point divide +def WriteFSqrt32 : SchedWrite; // 32-bit floating point sqrt +def WriteFSqrt64 : SchedWrite; // 64-bit floating point sqrt + +// Integer to float conversions +def WriteFCvtI32ToF32 : SchedWrite; +def WriteFCvtI32ToF64 : SchedWrite; +def WriteFCvtI64ToF32 : SchedWrite; // RV64I only +def WriteFCvtI64ToF64 : SchedWrite; // RV64I only + +//Float to integer conversions +def WriteFCvtF32ToI32 : SchedWrite; +def WriteFCvtF32ToI64 : SchedWrite; // RV64I only +def WriteFCvtF64ToI32 : SchedWrite; +def WriteFCvtF64ToI64 : SchedWrite; // RV64I only + +// Float to float conversions +def WriteFCvtF32ToF64 : SchedWrite; +def WriteFCvtF64ToF32 : SchedWrite; + +def WriteFConv32 : SchedWrite; // 32-bit floating point convert +def WriteFConv64 : SchedWrite; // 64-bit floating point convert +def WriteFClass32 : SchedWrite; // 32-bit floating point classify +def WriteFClass64 : SchedWrite; // 64-bit floating point classify +def WriteFCmp32 : SchedWrite; // 32-bit floating point compare +def WriteFCmp64 : SchedWrite; // 64-bit floating point compare + +def WriteFMovF32ToI32 : SchedWrite; +def WriteFMovI32ToF32 : SchedWrite; +def WriteFMovF64ToI64 : SchedWrite; // RV64I only +def WriteFMovI64ToF64 : SchedWrite; // RV64I only + +def WriteFMov32 : SchedWrite; // 32-bit floating point move +def WriteFMov64 : SchedWrite; // 64-bit floating point move +def WriteFLD32 : SchedWrite; // Floating point sp load +def WriteFLD64 : SchedWrite; // Floating point dp load +def WriteFST32 : SchedWrite; // Floating point sp store +def WriteFST64 : SchedWrite; // Floating point dp store + +/// Define scheduler resources associated with use operands. +def ReadJmp : SchedRead; +def ReadJalr : SchedRead; +def ReadCSR : SchedRead; +def ReadMemBase : SchedRead; +def ReadStoreData : SchedRead; +def ReadIALU : SchedRead; +def ReadIALU32 : SchedRead; // 32-bit integer ALU operations on RV64I +def ReadShift : SchedRead; +def ReadShift32 : SchedRead; // 32-bit shift operations on RV64Ix +def ReadIDiv : SchedRead; +def ReadIDiv32 : SchedRead; +def ReadIMul : SchedRead; +def ReadIMul32 : SchedRead; +def ReadAtomicWA : SchedRead; +def ReadAtomicWD : SchedRead; +def ReadAtomicDA : SchedRead; +def ReadAtomicDD : SchedRead; +def ReadAtomicLDW : SchedRead; // Atomic load word +def ReadAtomicLDD : SchedRead; // Atomic load double word +def ReadAtomicSTW : SchedRead; // Atomic store word +def ReadAtomicSTD : SchedRead; // Atomic store double word +def ReadFALU32 : SchedRead; // FP 32-bit computation +def ReadFALU64 : SchedRead; // FP 64-bit computation +def ReadFMul32 : SchedRead; // 32-bit floating point multiply +def ReadFMulAdd32 : SchedRead; // 32-bit floating point multiply add +def ReadFMulSub32 : SchedRead; // 32-bit floating point multiply sub +def ReadFMul64 : SchedRead; // 64-bit floating point multiply +def ReadFMulAdd64 : SchedRead; // 64-bit floating point multiply add +def ReadFMulSub64 : SchedRead; // 64-bit floating point multiply sub +def ReadFDiv32 : SchedRead; // 32-bit floating point divide +def ReadFDiv64 : SchedRead; // 64-bit floating point divide +def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt +def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt +def ReadFCmp32 : SchedRead; +def ReadFCmp64 : SchedRead; +def ReadFCvtF32ToI32 : SchedRead; +def ReadFCvtF32ToI64 : SchedRead; +def ReadFCvtF64ToI32 : SchedRead; +def ReadFCvtF64ToI64 : SchedRead; +def ReadFCvtI32ToF32 : SchedRead; +def ReadFCvtI32ToF64 : SchedRead; +def ReadFCvtI64ToF32 : SchedRead; +def ReadFCvtI64ToF64 : SchedRead; +def ReadFMovF32ToI32 : SchedRead; +def ReadFMovI32ToF32 : SchedRead; +def ReadFMovF64ToI64 : SchedRead; +def ReadFMovI64ToF64 : SchedRead; +def ReadFCvtF32ToF64 : SchedRead; +def ReadFCvtF64ToF32 : SchedRead; +def ReadFClass32 : SchedRead; +def ReadFClass64 : SchedRead; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 2bb26988c7da..de71c01753de 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -15,6 +15,7 @@ #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" +#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -89,8 +90,17 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = std::make_unique(TargetTriple, CPU, FS, - Options.MCOptions.getABIName(), *this); + auto ABIName = Options.MCOptions.getABIName(); + if (const MDString *ModuleTargetABI = dyn_cast_or_null( + F.getParent()->getModuleFlag("target-abi"))) { + auto TargetABI = RISCVABI::getTargetABI(ABIName); + if (TargetABI != RISCVABI::ABI_Unknown && + ModuleTargetABI->getString() != ABIName) { + report_fatal_error("-target-abi option != target-abi module flag"); + } + ABIName = ModuleTargetABI->getString(); + } + I = std::make_unique(TargetTriple, CPU, FS, ABIName, *this); } return I.get(); } diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp index 432ebb294d46..43b1f8b80c5f 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp @@ -12,16 +12,7 @@ namespace RISCVSysReg { namespace RISCVABI { ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName) { - auto TargetABI = StringSwitch(ABIName) - .Case("ilp32", ABI_ILP32) - .Case("ilp32f", ABI_ILP32F) - .Case("ilp32d", ABI_ILP32D) - .Case("ilp32e", ABI_ILP32E) - .Case("lp64", ABI_LP64) - .Case("lp64f", ABI_LP64F) - .Case("lp64d", ABI_LP64D) - .Default(ABI_Unknown); - + auto TargetABI = getTargetABI(ABIName); bool IsRV64 = TT.isArch64Bit(); bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; @@ -58,6 +49,19 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, return ABI_ILP32; } +ABI getTargetABI(StringRef ABIName) { + auto TargetABI = StringSwitch(ABIName) + .Case("ilp32", ABI_ILP32) + .Case("ilp32f", ABI_ILP32F) + .Case("ilp32d", ABI_ILP32D) + .Case("ilp32e", ABI_ILP32E) + .Case("lp64", ABI_LP64) + .Case("lp64f", ABI_LP64F) + .Case("lp64d", ABI_LP64D) + .Default(ABI_Unknown); + return TargetABI; +} + // To avoid the BP value clobbered by a function call, we need to choose a // callee saved register to save the value. RV32E only has X8 and X9 as callee // saved registers and X8 will be used as fp. So we choose X9 as bp. diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h index cf078df9609a..d36c528bba1e 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h @@ -202,6 +202,8 @@ enum ABI { ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName); +ABI getTargetABI(StringRef ABIName); + // Returns the register used to hold the stack pointer after realignment. Register getBPReg(); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index c73905d3357a..ab00069497af 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -6859,8 +6859,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, for (MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); NextMIIt != MBB->end(); ++NextMIIt) { - if (NextMIIt->definesRegister(SystemZ::CC)) - break; if (isSelectPseudo(*NextMIIt)) { assert(NextMIIt->getOperand(3).getImm() == CCValid && "Bad CCValid operands since CC was not redefined."); @@ -6871,6 +6869,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, } break; } + if (NextMIIt->definesRegister(SystemZ::CC) || + NextMIIt->usesCustomInsertionHook()) + break; bool User = false; for (auto SelMI : Selects) if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index d1f3acbd221e..3e905c18fa3b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -751,6 +751,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { auto *II = dyn_cast(BB.getTerminator()); if (!II) continue; + Changed = true; LandingPads.insert(II->getLandingPadInst()); IRB.SetInsertPoint(II); @@ -791,6 +792,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { auto *RI = dyn_cast(&I); if (!RI) continue; + Changed = true; // Split the input into legal values Value *Input = RI->getValue(); @@ -815,6 +817,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { continue; if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) continue; + Changed = true; IRB.SetInsertPoint(CI); CallInst *NewCI = @@ -830,7 +833,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { if (auto *LPI = dyn_cast(I)) LandingPads.insert(LPI); } - Changed = !LandingPads.empty(); + Changed |= !LandingPads.empty(); // Handle all the landingpad for this function together, as multiple invokes // may share a single lp diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index dffda5217675..2284cd7a70b8 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -85,13 +85,13 @@ cl::opt X86AlignBranchBoundary( cl::opt> X86AlignBranch( "x86-align-branch", - cl::desc("Specify types of branches to align (plus separated list of " - "types). The branches's types are combination of jcc, fused, " - "jmp, call, ret, indirect."), - cl::value_desc("jcc indicates conditional jumps, fused indicates fused " - "conditional jumps, jmp indicates unconditional jumps, call " - "indicates direct and indirect calls, ret indicates rets, " - "indirect indicates indirect jumps."), + cl::desc( + "Specify types of branches to align. The branches's types are " + "combination of jcc, fused, jmp, call, ret, indirect. jcc indicates " + "conditional jumps, fused indicates fused conditional jumps, jmp " + "indicates unconditional jumps, call indicates direct and indirect " + "calls, ret indicates rets, indirect indicates indirect jumps."), + cl::value_desc("(plus separated list of types)"), cl::location(X86AlignBranchKindLoc)); cl::opt X86AlignBranchWithin32BBoundaries( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0f152968ddfd..cbdd7135de43 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21056,7 +21056,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, // Divide by pow2. SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 2fc9a2af01d7..7f49c6e861d4 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2002,6 +2002,25 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { break; } + case X86::ENDBR32: + case X86::ENDBR64: { + // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for + // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be + // non-empty. If MI is the initial ENDBR, place the + // __patchable_function_entries label after ENDBR. + if (CurrentPatchableFunctionEntrySym && + CurrentPatchableFunctionEntrySym == CurrentFnBegin && + MI == &MF->front().front()) { + MCInst Inst; + MCInstLowering.Lower(MI, Inst); + EmitAndCountInstruction(Inst); + CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); + OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); + return; + } + break; + } + case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 9c992830879a..7cfc29f7bf7a 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" @@ -187,8 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() { } /// Set of global extensions, automatically added as part of the standard set. -static ManagedStatic, 8> > GlobalExtensions; +static ManagedStatic< + SmallVector, + 8>> + GlobalExtensions; +static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; /// Check if GlobalExtensions is constructed and not empty. /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger @@ -197,10 +203,29 @@ static bool GlobalExtensionsNotEmpty() { return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); } -void PassManagerBuilder::addGlobalExtension( - PassManagerBuilder::ExtensionPointTy Ty, - PassManagerBuilder::ExtensionFn Fn) { - GlobalExtensions->push_back(std::make_pair(Ty, std::move(Fn))); +PassManagerBuilder::GlobalExtensionID +PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, + PassManagerBuilder::ExtensionFn Fn) { + auto ExtensionID = GlobalExtensionsCounter++; + GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); + return ExtensionID; +} + +void PassManagerBuilder::removeGlobalExtension( + PassManagerBuilder::GlobalExtensionID ExtensionID) { + // RegisterStandardPasses may try to call this function after GlobalExtensions + // has already been destroyed; doing so should not generate an error. + if (!GlobalExtensions.isConstructed()) + return; + + auto GlobalExtension = + llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { + return std::get<2>(elem) == ExtensionID; + }); + assert(GlobalExtension != GlobalExtensions->end() && + "The extension ID to be removed should always be valid."); + + GlobalExtensions->erase(GlobalExtension); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { @@ -211,8 +236,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { if (GlobalExtensionsNotEmpty()) { for (auto &Ext : *GlobalExtensions) { - if (Ext.first == ETy) - Ext.second(*this, PM); + if (std::get<0>(Ext) == ETy) + std::get<1>(Ext)(*this, PM); } } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c288a7d8d403..74654f7ef51d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1336,6 +1336,11 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, if (!isMinMaxWithLoads(LoadAddr, CmpLoadTy)) return false; + // Make sure the type would actually change. + // This condition can be hit with chains of bitcasts. + if (LI->getType() == CmpLoadTy) + return false; + // Make sure we're not changing the size of the load/store. const auto &DL = IC.getDataLayout(); if (DL.getTypeStoreSizeInBits(LI->getType()) != diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 05a624fde86b..49645e9460cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1013,6 +1013,12 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, Cmp.getPredicate() == CanonicalPred) return nullptr; + // Bail out on unsimplified X-0 operand (due to some worklist management bug), + // as this may cause an infinite combine loop. Let the sub be folded first. + if (match(LHS, m_Sub(m_Value(), m_Zero())) || + match(RHS, m_Sub(m_Value(), m_Zero()))) + return nullptr; + // Create the canonical compare and plug it into the select. Sel.setCondition(Builder.CreateICmp(CanonicalPred, LHS, RHS)); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 801c09a317a7..bf32996d96e2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3568,7 +3568,8 @@ static bool combineInstructionsOverFunction( ProfileSummaryInfo *PSI, bool ExpensiveCombines, unsigned MaxIterations, LoopInfo *LI) { auto &DL = F.getParent()->getDataLayout(); - ExpensiveCombines |= EnableExpensiveCombines; + if (EnableExpensiveCombines.getNumOccurrences()) + ExpensiveCombines = EnableExpensiveCombines; MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue()); /// Builder - This is an IRBuilder that automatically inserts new diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 80acab307578..f581142df8f7 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3005,6 +3005,43 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + Constant *getPclmulMask(IRBuilder<> &IRB, unsigned Width, bool OddElements) { + SmallVector Mask; + for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) { + Constant *C = ConstantInt::get(IRB.getInt32Ty(), X); + Mask.push_back(C); + Mask.push_back(C); + } + return ConstantVector::get(Mask); + } + + // Instrument pclmul intrinsics. + // These intrinsics operate either on odd or on even elements of the input + // vectors, depending on the constant in the 3rd argument, ignoring the rest. + // Replace the unused elements with copies of the used ones, ex: + // (0, 1, 2, 3) -> (0, 0, 2, 2) (even case) + // or + // (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case) + // and then apply the usual shadow combining logic. + void handlePclmulIntrinsic(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + Type *ShadowTy = getShadowTy(&I); + unsigned Width = I.getArgOperand(0)->getType()->getVectorNumElements(); + assert(isa(I.getArgOperand(2)) && + "pclmul 3rd operand must be a constant"); + unsigned Imm = cast(I.getArgOperand(2))->getZExtValue(); + Value *Shuf0 = + IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy), + getPclmulMask(IRB, Width, Imm & 0x01)); + Value *Shuf1 = + IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy), + getPclmulMask(IRB, Width, Imm & 0x10)); + ShadowAndOriginCombiner SOC(this, IRB); + SOC.Add(Shuf0, getOrigin(&I, 0)); + SOC.Add(Shuf1, getOrigin(&I, 1)); + SOC.Done(&I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -3238,6 +3275,12 @@ struct MemorySanitizerVisitor : public InstVisitor { handleBmiIntrinsic(I); break; + case Intrinsic::x86_pclmulqdq: + case Intrinsic::x86_pclmulqdq_256: + case Intrinsic::x86_pclmulqdq_512: + handlePclmulIntrinsic(I); + break; + case Intrinsic::is_constant: // The result of llvm.is.constant() is always defined. setShadow(&I, getCleanShadow(&I)); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index c3ca43fcd492..e5edd305d3d5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -279,9 +279,10 @@ class LoopVectorizationPlanner { /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. - VPlanPtr - buildVPlanWithVPRecipes(VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions); + VPlanPtr buildVPlanWithVPRecipes( + VFRange &Range, SmallPtrSetImpl &NeedDef, + SmallPtrSetImpl &DeadInstructions, + const DenseMap &SinkAfter); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 684a3098e564..ebfd5fe8b762 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6716,7 +6716,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, BranchInst *BI = dyn_cast(Src->getTerminator()); assert(BI && "Unexpected terminator found"); - if (!BI->isConditional()) + if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1)) return EdgeMaskCache[Edge] = SrcMask; VPValue *EdgeMask = Plan->getVPValue(BI->getCondition()); @@ -7118,25 +7118,29 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, SmallPtrSet DeadInstructions; collectTriviallyDeadInstructions(DeadInstructions); + DenseMap &SinkAfter = Legal->getSinkAfter(); + // Dead instructions do not need sinking. Remove them from SinkAfter. + for (Instruction *I : DeadInstructions) + SinkAfter.erase(I); + for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, NeedDef, DeadInstructions)); + VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, + DeadInstructions, SinkAfter)); VF = SubRange.End; } } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions) { + SmallPtrSetImpl &DeadInstructions, + const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; - DenseMap &SinkAfter = Legal->getSinkAfter(); - SmallPtrSet *, 1> InterleaveGroups; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index bfe7e8f04303..0efd0df2c12b 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -197,6 +197,11 @@ namespace { cl::desc("Generate software floating point library calls"), cl::init(false)); + cl::opt NoProcessSymbols( + "no-process-syms", + cl::desc("Do not resolve lli process symbols in JIT'd code"), + cl::init(false)); + enum class DumpKind { NoDump, DumpFuncsToStdOut, @@ -794,12 +799,16 @@ int runOrcLazyJIT(const char *ProgName) { }); orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout()); - J->getMainJITDylib().addGenerator( - ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( - J->getDataLayout().getGlobalPrefix(), - [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) { - return Name != MainName; - }))); + + // Unless they've been explicitly disabled, make process symbols available to + // JIT'd code. + if (!NoProcessSymbols) + J->getMainJITDylib().addGenerator( + ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( + J->getDataLayout().getGlobalPrefix(), + [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) { + return Name != MainName; + }))); orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides; ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));