diff --git a/contrib/llvm-project/FREEBSD-Xlist b/contrib/llvm-project/FREEBSD-Xlist index d72039f1b7de..4b581b177faf 100644 --- a/contrib/llvm-project/FREEBSD-Xlist +++ b/contrib/llvm-project/FREEBSD-Xlist @@ -531,6 +531,7 @@ llvm/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt llvm/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt llvm/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt +llvm/lib/Extensions/ llvm/lib/Frontend/CMakeLists.txt llvm/lib/Frontend/LLVMBuild.txt llvm/lib/Frontend/OpenMP/CMakeLists.txt diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp index c38c724ed9b0..264c903209af 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplate.cpp @@ -3817,6 +3817,9 @@ TypeResult Sema::ActOnTagTemplateIdType(TagUseKind TUK, SourceLocation LAngleLoc, ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc) { + if (SS.isInvalid()) + return TypeResult(true); + TemplateName Template = TemplateD.get(); // Translate the parser's template argument list in our AST format. @@ -5925,7 +5928,9 @@ bool UnnamedLocalNoLinkageFinder::VisitDependentNameType( bool UnnamedLocalNoLinkageFinder::VisitDependentTemplateSpecializationType( const DependentTemplateSpecializationType* T) { - return VisitNestedNameSpecifier(T->getQualifier()); + if (auto *Q = T->getQualifier()) + return VisitNestedNameSpecifier(Q); + return false; } bool UnnamedLocalNoLinkageFinder::VisitPackExpansionType( @@ -5979,6 +5984,7 @@ bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) { bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier( NestedNameSpecifier *NNS) { + assert(NNS); if (NNS->getPrefix() && VisitNestedNameSpecifier(NNS->getPrefix())) return true; diff --git a/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp b/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp index 3df1c064923a..35a35f904069 100644 --- a/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/contrib/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp @@ -335,14 +335,38 @@ class TokenCollector::CollectPPExpansions : public PPCallbacks { SourceRange Range, const MacroArgs *Args) override { if (!Collector) return; - // Only record top-level expansions, not those where: + const auto &SM = Collector->PP.getSourceManager(); + // Only record top-level expansions that directly produce expanded tokens. + // This excludes those where: // - the macro use is inside a macro body, // - the macro appears in an argument to another macro. - if (!MacroNameTok.getLocation().isFileID() || - (LastExpansionEnd.isValid() && - Collector->PP.getSourceManager().isBeforeInTranslationUnit( - Range.getBegin(), LastExpansionEnd))) + // However macro expansion isn't really a tree, it's token rewrite rules, + // so there are other cases, e.g. + // #define B(X) X + // #define A 1 + B + // A(2) + // Both A and B produce expanded tokens, though the macro name 'B' comes + // from an expansion. The best we can do is merge the mappings for both. + + // The *last* token of any top-level macro expansion must be in a file. + // (In the example above, see the closing paren of the expansion of B). + if (!Range.getEnd().isFileID()) return; + // If there's a current expansion that encloses this one, this one can't be + // top-level. + if (LastExpansionEnd.isValid() && + !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())) + return; + + // If the macro invocation (B) starts in a macro (A) but ends in a file, + // we'll create a merged mapping for A + B by overwriting the endpoint for + // A's startpoint. + if (!Range.getBegin().isFileID()) { + Range.setBegin(SM.getExpansionLoc(Range.getBegin())); + assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) && + "Overlapping macros should have same expansion location"); + } + Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd(); LastExpansionEnd = Range.getEnd(); } @@ -399,197 +423,167 @@ class TokenCollector::Builder { } TokenBuffer build() && { - buildSpelledTokens(); - - // Walk over expanded tokens and spelled tokens in parallel, building the - // mappings between those using source locations. - // To correctly recover empty macro expansions, we also take locations - // reported to PPCallbacks::MacroExpands into account as we do not have any - // expanded tokens with source locations to guide us. - - // The 'eof' token is special, it is not part of spelled token stream. We - // handle it separately at the end. assert(!Result.ExpandedTokens.empty()); assert(Result.ExpandedTokens.back().kind() == tok::eof); - for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) { - // (!) I might be updated by the following call. - processExpandedToken(I); + + // Tokenize every file that contributed tokens to the expanded stream. + buildSpelledTokens(); + + // The expanded token stream consists of runs of tokens that came from + // the same source (a macro expansion, part of a file etc). + // Between these runs are the logical positions of spelled tokens that + // didn't expand to anything. + while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { + // Create empty mappings for spelled tokens that expanded to nothing here. + // May advance NextSpelled, but NextExpanded is unchanged. + discard(); + // Create mapping for a contiguous run of expanded tokens. + // Advances NextExpanded past the run, and NextSpelled accordingly. + unsigned OldPosition = NextExpanded; + advance(); + if (NextExpanded == OldPosition) + diagnoseAdvanceFailure(); } - - // 'eof' not handled in the loop, do it here. - assert(SM.getMainFileID() == - SM.getFileID(Result.ExpandedTokens.back().location())); - fillGapUntil(Result.Files[SM.getMainFileID()], - Result.ExpandedTokens.back().location(), - Result.ExpandedTokens.size() - 1); - Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size(); - - // Some files might have unaccounted spelled tokens at the end, add an empty - // mapping for those as they did not have expanded counterparts. - fillGapsAtEndOfFiles(); + // If any tokens remain in any of the files, they didn't expand to anything. + // Create empty mappings up until the end of the file. + for (const auto &File : Result.Files) + discard(File.first); return std::move(Result); } private: - /// Process the next token in an expanded stream and move corresponding - /// spelled tokens, record any mapping if needed. - /// (!) \p I will be updated if this had to skip tokens, e.g. for macros. - void processExpandedToken(unsigned &I) { - auto L = Result.ExpandedTokens[I].location(); - if (L.isMacroID()) { - processMacroExpansion(SM.getExpansionRange(L), I); - return; + // Consume a sequence of spelled tokens that didn't expand to anything. + // In the simplest case, skips spelled tokens until finding one that produced + // the NextExpanded token, and creates an empty mapping for them. + // If Drain is provided, skips remaining tokens from that file instead. + void discard(llvm::Optional Drain = llvm::None) { + SourceLocation Target = + Drain ? SM.getLocForEndOfFile(*Drain) + : SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()); + FileID File = SM.getFileID(Target); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; + + TokenBuffer::Mapping Mapping; + Mapping.BeginSpelled = NextSpelled; + // When dropping trailing tokens from a file, the empty mapping should + // be positioned within the file's expanded-token range (at the end). + Mapping.BeginExpanded = Mapping.EndExpanded = + Drain ? Result.Files[*Drain].EndExpanded : NextExpanded; + // We may want to split into several adjacent empty mappings. + // FlushMapping() emits the current mapping and starts a new one. + auto FlushMapping = [&, this] { + Mapping.EndSpelled = NextSpelled; + if (Mapping.BeginSpelled != Mapping.EndSpelled) + Result.Files[File].Mappings.push_back(Mapping); + Mapping.BeginSpelled = NextSpelled; + }; + + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() < Target) { + // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) + // then we want to partition our (empty) mapping. + // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) + SourceLocation KnownEnd = CollectedExpansions.lookup( + SpelledTokens[NextSpelled].location().getRawEncoding()); + if (KnownEnd.isValid()) { + FlushMapping(); // Emits [Start, NextSpelled) + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= KnownEnd) + ++NextSpelled; + FlushMapping(); // Emits [NextSpelled, KnownEnd] + // Now the loop contitues and will emit (KnownEnd, Target). + } else { + ++NextSpelled; + } } - if (L.isFileID()) { - auto FID = SM.getFileID(L); - TokenBuffer::MarkedFile &File = Result.Files[FID]; + FlushMapping(); + } - fillGapUntil(File, L, I); + // Consumes the NextExpanded token and others that are part of the same run. + // Increases NextExpanded and NextSpelled by at least one, and adds a mapping + // (unless this is a run of file tokens, which we represent with no mapping). + void advance() { + const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; + SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); + FileID File = SM.getFileID(Expansion); + const auto &SpelledTokens = Result.Files[File].SpelledTokens; + auto &NextSpelled = this->NextSpelled[File]; - // Skip the token. - assert(File.SpelledTokens[NextSpelled[FID]].location() == L && - "no corresponding token in the spelled stream"); - ++NextSpelled[FID]; - return; + if (Tok.location().isFileID()) { + // A run of file tokens continues while the expanded/spelled tokens match. + while (NextSpelled < SpelledTokens.size() && + NextExpanded < Result.ExpandedTokens.size() && + SpelledTokens[NextSpelled].location() == + Result.ExpandedTokens[NextExpanded].location()) { + ++NextSpelled; + ++NextExpanded; + } + // We need no mapping for file tokens copied to the expanded stream. + } else { + // We found a new macro expansion. We should have its spelling bounds. + auto End = CollectedExpansions.lookup(Expansion.getRawEncoding()); + assert(End.isValid() && "Macro expansion wasn't captured?"); + + // Mapping starts here... + TokenBuffer::Mapping Mapping; + Mapping.BeginExpanded = NextExpanded; + Mapping.BeginSpelled = NextSpelled; + // ... consumes spelled tokens within bounds we captured ... + while (NextSpelled < SpelledTokens.size() && + SpelledTokens[NextSpelled].location() <= End) + ++NextSpelled; + // ... consumes expanded tokens rooted at the same expansion ... + while (NextExpanded < Result.ExpandedTokens.size() && + SM.getExpansionLoc( + Result.ExpandedTokens[NextExpanded].location()) == Expansion) + ++NextExpanded; + // ... and ends here. + Mapping.EndExpanded = NextExpanded; + Mapping.EndSpelled = NextSpelled; + Result.Files[File].Mappings.push_back(Mapping); } } - /// Skipped expanded and spelled tokens of a macro expansion that covers \p - /// SpelledRange. Add a corresponding mapping. - /// (!) \p I will be the index of the last token in an expansion after this - /// function returns. - void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) { - auto FID = SM.getFileID(SpelledRange.getBegin()); - assert(FID == SM.getFileID(SpelledRange.getEnd())); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, SpelledRange.getBegin(), I); - - // Skip all expanded tokens from the same macro expansion. - unsigned BeginExpanded = I; - for (; I + 1 < Result.ExpandedTokens.size(); ++I) { - auto NextL = Result.ExpandedTokens[I + 1].location(); - if (!NextL.isMacroID() || - SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) - break; + // advance() is supposed to consume at least one token - if not, we crash. + void diagnoseAdvanceFailure() { +#ifndef NDEBUG + // Show the failed-to-map token in context. + for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; + I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { + const char *L = + (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; + llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; } - unsigned EndExpanded = I + 1; - consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded, - EndExpanded, NextSpelled[FID]); +#endif + llvm_unreachable("Couldn't map expanded token to spelled tokens!"); } /// Initializes TokenBuffer::Files and fills spelled tokens and expanded /// ranges for each of the files. void buildSpelledTokens() { for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { - auto FID = - SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location())); + const auto &Tok = Result.ExpandedTokens[I]; + auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); auto It = Result.Files.try_emplace(FID); TokenBuffer::MarkedFile &File = It.first->second; - File.EndExpanded = I + 1; + // The eof token should not be considered part of the main-file's range. + File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1; + if (!It.second) continue; // we have seen this file before. - // This is the first time we see this file. File.BeginExpanded = I; File.SpelledTokens = tokenize(FID, SM, LangOpts); } } - void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned ExpandedIndex, unsigned &SpelledIndex) { - consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex); - } - - /// Consumes spelled tokens that form a macro expansion and adds a entry to - /// the resulting token buffer. - /// (!) SpelledIndex is updated in-place. - void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset, - unsigned BeginExpanded, unsigned EndExpanded, - unsigned &SpelledIndex) { - // We need to record this mapping before continuing. - unsigned MappingBegin = SpelledIndex; - ++SpelledIndex; - - bool HitMapping = - tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue(); - (void)HitMapping; - assert(!HitMapping && "recursive macro expansion?"); - - TokenBuffer::Mapping M; - M.BeginExpanded = BeginExpanded; - M.EndExpanded = EndExpanded; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - - File.Mappings.push_back(M); - } - - /// Consumes spelled tokens until location \p L is reached and adds a mapping - /// covering the consumed tokens. The mapping will point to an empty expanded - /// range at position \p ExpandedIndex. - void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, - unsigned ExpandedIndex) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); - - unsigned &SpelledIndex = NextSpelled[FID]; - unsigned MappingBegin = SpelledIndex; - while (true) { - auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex); - if (SpelledIndex != MappingBegin) { - TokenBuffer::Mapping M; - M.BeginSpelled = MappingBegin; - M.EndSpelled = SpelledIndex; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); - } - if (!EndLoc) - break; - consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex, - SpelledIndex); - - MappingBegin = SpelledIndex; - } - }; - - /// Consumes spelled tokens until it reaches Offset or a mapping boundary, - /// i.e. a name of a macro expansion or the start '#' token of a PP directive. - /// (!) NextSpelled is updated in place. - /// - /// returns None if \p Offset was reached, otherwise returns the end location - /// of a mapping that starts at \p NextSpelled. - llvm::Optional - tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset, - unsigned &NextSpelled) { - for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) { - auto L = File.SpelledTokens[NextSpelled].location(); - if (Offset <= SM.getFileOffset(L)) - return llvm::None; // reached the offset we are looking for. - auto Mapping = CollectedExpansions.find(L.getRawEncoding()); - if (Mapping != CollectedExpansions.end()) - return Mapping->second; // found a mapping before the offset. - } - return llvm::None; // no more tokens, we "reached" the offset. - } - - /// Adds empty mappings for unconsumed spelled tokens at the end of each file. - void fillGapsAtEndOfFiles() { - for (auto &F : Result.Files) { - if (F.second.SpelledTokens.empty()) - continue; - fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(), - F.second.EndExpanded); - } - } - TokenBuffer Result; - /// For each file, a position of the next spelled token we will consume. - llvm::DenseMap NextSpelled; + unsigned NextExpanded = 0; // cursor in ExpandedTokens + llvm::DenseMap NextSpelled; // cursor in SpelledTokens PPExpansions CollectedExpansions; const SourceManager &SM; const LangOptions &LangOpts; diff --git a/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp b/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp index 4c3742c8e339..2fce9d428137 100644 --- a/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/contrib/llvm-project/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -2825,6 +2825,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " bool isInherited = Record.readInt();\n"; OS << " bool isImplicit = Record.readInt();\n"; + OS << " bool isPackExpansion = Record.readInt();\n"; ArgRecords = R.getValueAsListOfDefs("Args"); Args.clear(); for (const auto *Arg : ArgRecords) { @@ -2840,6 +2841,7 @@ void EmitClangAttrPCHRead(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " cast(New)->setInherited(isInherited);\n"; OS << " New->setImplicit(isImplicit);\n"; + OS << " New->setPackExpansion(isPackExpansion);\n"; OS << " break;\n"; OS << " }\n"; } @@ -2866,6 +2868,7 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) { if (R.isSubClassOf(InhClass)) OS << " Record.push_back(SA->isInherited());\n"; OS << " Record.push_back(A->isImplicit());\n"; + OS << " Record.push_back(A->isPackExpansion());\n"; for (const auto *Arg : Args) createArgument(*Arg, R.getName())->writePCHWrite(OS); diff --git a/contrib/llvm-project/lld/COFF/Chunks.h b/contrib/llvm-project/lld/COFF/Chunks.h index 7ae4ee735f4a..2be2a72c4a1e 100644 --- a/contrib/llvm-project/lld/COFF/Chunks.h +++ b/contrib/llvm-project/lld/COFF/Chunks.h @@ -486,7 +486,9 @@ class ImportThunkChunkX86 : public ImportThunkChunk { class ImportThunkChunkARM : public ImportThunkChunk { public: - explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {} + explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) { + setAlignment(2); + } size_t getSize() const override { return sizeof(importThunkARM); } void getBaserels(std::vector *res) override; void writeTo(uint8_t *buf) const override; @@ -494,14 +496,16 @@ class ImportThunkChunkARM : public ImportThunkChunk { class ImportThunkChunkARM64 : public ImportThunkChunk { public: - explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {} + explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) { + setAlignment(4); + } size_t getSize() const override { return sizeof(importThunkARM64); } void writeTo(uint8_t *buf) const override; }; class RangeExtensionThunkARM : public NonSectionChunk { public: - explicit RangeExtensionThunkARM(Defined *t) : target(t) {} + explicit RangeExtensionThunkARM(Defined *t) : target(t) { setAlignment(2); } size_t getSize() const override; void writeTo(uint8_t *buf) const override; diff --git a/contrib/llvm-project/lld/COFF/DLL.cpp b/contrib/llvm-project/lld/COFF/DLL.cpp index 39d9fbab63d5..50301ad91b1d 100644 --- a/contrib/llvm-project/lld/COFF/DLL.cpp +++ b/contrib/llvm-project/lld/COFF/DLL.cpp @@ -365,7 +365,9 @@ class TailMergeChunkX86 : public NonSectionChunk { class ThunkChunkARM : public NonSectionChunk { public: - ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {} + ThunkChunkARM(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) { + setAlignment(2); + } size_t getSize() const override { return sizeof(thunkARM); } @@ -385,7 +387,9 @@ class ThunkChunkARM : public NonSectionChunk { class TailMergeChunkARM : public NonSectionChunk { public: - TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) {} + TailMergeChunkARM(Chunk *d, Defined *h) : desc(d), helper(h) { + setAlignment(2); + } size_t getSize() const override { return sizeof(tailMergeARM); } @@ -405,7 +409,9 @@ class TailMergeChunkARM : public NonSectionChunk { class ThunkChunkARM64 : public NonSectionChunk { public: - ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {} + ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) { + setAlignment(4); + } size_t getSize() const override { return sizeof(thunkARM64); } @@ -422,7 +428,9 @@ class ThunkChunkARM64 : public NonSectionChunk { class TailMergeChunkARM64 : public NonSectionChunk { public: - TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) {} + TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) { + setAlignment(4); + } size_t getSize() const override { return sizeof(tailMergeARM64); } diff --git a/contrib/llvm-project/lld/ELF/ScriptLexer.cpp b/contrib/llvm-project/lld/ELF/ScriptLexer.cpp index e0ff56fec3f3..1fed3d06227e 100644 --- a/contrib/llvm-project/lld/ELF/ScriptLexer.cpp +++ b/contrib/llvm-project/lld/ELF/ScriptLexer.cpp @@ -52,6 +52,8 @@ StringRef ScriptLexer::getLine() { // Returns 1-based line number of the current token. size_t ScriptLexer::getLineNumber() { + if (pos == 0) + return 1; StringRef s = getCurrentMB().getBuffer(); StringRef tok = tokens[pos - 1]; return s.substr(0, tok.data() - s.data()).count('\n') + 1; @@ -292,7 +294,9 @@ static bool encloses(StringRef s, StringRef t) { MemoryBufferRef ScriptLexer::getCurrentMB() { // Find input buffer containing the current token. - assert(!mbs.empty() && pos > 0); + assert(!mbs.empty()); + if (pos == 0) + return mbs.back(); for (MemoryBufferRef mb : mbs) if (encloses(mb.getBuffer(), tokens[pos - 1])) return mb; diff --git a/contrib/llvm-project/lld/ELF/ScriptParser.cpp b/contrib/llvm-project/lld/ELF/ScriptParser.cpp index fd8de3b54bd7..80ec8b655b04 100644 --- a/contrib/llvm-project/lld/ELF/ScriptParser.cpp +++ b/contrib/llvm-project/lld/ELF/ScriptParser.cpp @@ -737,6 +737,7 @@ bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok1, Stri expect("("); if (consume("NOLOAD")) { cmd->noload = true; + cmd->type = SHT_NOBITS; } else { skip(); // This is "COPY", "INFO" or "OVERLAY". cmd->nonAlloc = true; diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def index 6b25ef2ca435..050059e36a25 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/contrib/llvm-project/llvm/include/llvm/Support/AArch64TargetParser.def @@ -152,6 +152,10 @@ AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) +AARCH64_CPU_NAME("thunderx3t110", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_CRC | AEK_CRYPTO | AEK_FP | AEK_SIMD | + AEK_LSE | AEK_RAND | AArch64::AEK_PROFILE | + AArch64::AEK_RAS)) AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 4b9c50aeb1d3..35964b2cdbda 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -963,10 +963,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; } - // If one of the blocks is the entire common tail (and not the entry - // block, which we can't jump to), we can treat all blocks with this same - // tail at once. Use PredBB if that is one of the possibilities, as that - // will not introduce any extra branches. + // If one of the blocks is the entire common tail (and is not the entry + // block/an EH pad, which we can't jump to), we can treat all blocks with + // this same tail at once. Use PredBB if that is one of the possibilities, + // as that will not introduce any extra branches. MachineBasicBlock *EntryBB = &MergePotentials.front().getBlock()->getParent()->front(); unsigned commonTailIndex = SameTails.size(); @@ -974,19 +974,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // into the other. if (SameTails.size() == 2 && SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && - SameTails[1].tailIsWholeBlock()) + SameTails[1].tailIsWholeBlock() && !SameTails[1].getBlock()->isEHPad()) commonTailIndex = 1; else if (SameTails.size() == 2 && SameTails[1].getBlock()->isLayoutSuccessor( - SameTails[0].getBlock()) && - SameTails[0].tailIsWholeBlock()) + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock() && + !SameTails[0].getBlock()->isEHPad()) commonTailIndex = 0; else { // Otherwise just pick one, favoring the fall-through predecessor if // there is one. for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { MachineBasicBlock *MBB = SameTails[i].getBlock(); - if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + if ((MBB == EntryBB || MBB->isEHPad()) && + SameTails[i].tailIsWholeBlock()) continue; if (MBB == PredBB) { commonTailIndex = i; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index 0106355b1a44..6e57543c4c0f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -443,6 +443,10 @@ def SVEUnsupported : AArch64Unsupported { HasSVE2BitPerm]; } +def PAUnsupported : AArch64Unsupported { + let F = [HasPA]; +} + include "AArch64SchedA53.td" include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" @@ -453,6 +457,7 @@ include "AArch64SchedExynosM4.td" include "AArch64SchedExynosM5.td" include "AArch64SchedThunderX.td" include "AArch64SchedThunderX2T99.td" +include "AArch64SchedThunderX3T110.td" def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", [ @@ -780,6 +785,25 @@ def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", FeatureLSE, HasV8_1aOps]>; +def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", + "ThunderX3T110", + "Marvell ThunderX3 processors", [ + FeatureAggressiveFMA, + FeatureCRC, + FeatureCrypto, + FeatureFPARMv8, + FeatureArithmeticBccFusion, + FeatureNEON, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureLSE, + FeaturePA, + FeatureUseAA, + FeatureBalanceFPOps, + FeaturePerfMon, + FeatureStrictAlign, + HasV8_3aOps]>; + def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ FeatureCRC, @@ -878,6 +902,8 @@ def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>; def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>; // Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan. def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>; +// Marvell ThunderX3T110 Processors. +def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>; // FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57. def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp index 6fa3a462bc71..1956014b738d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp @@ -118,9 +118,15 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, bool CouldCall, auto MBBI = MBB.begin(); - // PACI[AB]SP are implicitly BTI JC, so no BTI instruction needed there. - if (MBBI != MBB.end() && (MBBI->getOpcode() == AArch64::PACIASP || - MBBI->getOpcode() == AArch64::PACIBSP)) + // Skip the meta instuctions, those will be removed anyway. + for (; MBBI != MBB.end() && MBBI->isMetaInstruction(); ++MBBI) + ; + + // SCTLR_EL1.BT[01] is set to 0 by default which means + // PACI[AB]SP are implicitly BTI C so no BTI C instruction is needed there. + if (MBBI != MBB.end() && HintNum == 34 && + (MBBI->getOpcode() == AArch64::PACIASP || + MBBI->getOpcode() == AArch64::PACIBSP)) return; BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td index a6df0f3f083c..c5ff1fcb274b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -26,7 +26,8 @@ def CortexA53Model : SchedMachineModel { // v 1.0 Spreadsheet let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td index 9f566d1c7079..a760c4319005 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -31,7 +31,8 @@ def CortexA57Model : SchedMachineModel { let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 798ecb7508c0..5ddfe9e0e34c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -18,7 +18,8 @@ def CycloneModel : SchedMachineModel { let MispredictPenalty = 16; // 14-19 cycles are typical. let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index d1734c455b2b..3272640cb3f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -24,7 +24,8 @@ def ExynosM3Model : SchedMachineModel { let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index d2284f9fa0b5..cac86491f56d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -24,7 +24,8 @@ def ExynosM4Model : SchedMachineModel { let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index df7402591e7b..86477b81b8bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -24,7 +24,8 @@ def ExynosM5Model : SchedMachineModel { let MispredictPenalty = 15; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. - list UnsupportedFeatures = SVEUnsupported.F; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index 92d03963de57..a17ab36d7f9e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -23,8 +23,8 @@ def FalkorModel : SchedMachineModel { let MispredictPenalty = 11; // Minimum branch misprediction penalty. let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; - + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td index 0e1a24103121..ba14bf1f50de 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -27,8 +27,8 @@ def KryoModel : SchedMachineModel { let LoopMicroOpBufferSize = 16; let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; - + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index 3b6aecf5c035..9c50f9708583 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -25,8 +25,8 @@ def ThunderXT8XModel : SchedMachineModel { let PostRAScheduler = 1; // Use PostRA scheduler. let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; - + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index e2a293c06877..233613f7be3a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -25,8 +25,8 @@ def ThunderX2T99Model : SchedMachineModel { let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; - list UnsupportedFeatures = SVEUnsupported.F; - + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td new file mode 100644 index 000000000000..00838cc4b9bd --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -0,0 +1,1997 @@ +//=- AArch64SchedThunderX3T110.td - Marvell ThunderX3 T110 ---*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the scheduling model for Marvell ThunderX3T110 +// family of processors. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pipeline Description. + +def ThunderX3T110Model : SchedMachineModel { + let IssueWidth = 4; // 4 micro-ops dispatched at a time. + let MicroOpBufferSize = 70; // 70 entries in micro-op re-order buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 12; // Extra cycles for mispredicted branch. + // Determined via a mix of micro-arch details and experimentation. + let LoopMicroOpBufferSize = 128; // FIXME: might be much bigger in TX3. + let PostRAScheduler = 1; // Using PostRA sched. + let CompleteModel = 1; + + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); + // FIXME: Remove when all errors have been fixed. + let FullInstRWOverlapCheck = 0; +} + +let SchedModel = ThunderX3T110Model in { + +// Issue ports. + +// Port 0: ALU. +def THX3T110P0 : ProcResource<1>; + +// Port 1: ALU. +def THX3T110P1 : ProcResource<1>; + +// Port 2: ALU/Branch. +def THX3T110P2 : ProcResource<1>; + +// Port 3: ALU/Branch. +def THX3T110P3 : ProcResource<1>; + +// Port 4: Load/Store. +def THX3T110P4 : ProcResource<1>; + +// Port 5: Load/store. +def THX3T110P5 : ProcResource<1>; + +// Port 6: FP/Neon/SIMD/Crypto. +def THX3T110P6FP0 : ProcResource<1>; + +// Port 7: FP/Neon/SIMD/Crypto. +def THX3T110P7FP1 : ProcResource<1>; + +// Port 8: FP/Neon/SIMD/Crypto. +def THX3T110P8FP2 : ProcResource<1>; + +// Port 9: FP/Neon/SIMD/Crypto. +def THX3T110P9FP3 : ProcResource<1>; + +// Port 10: Store Data Unit. +def THX3T110SD0 : ProcResource<1>; + +// Define groups for the functional units on each issue port. Each group +// created will be used by a WriteRes. + +// Integer divide/mulhi micro-ops only on port I1. +def THX3T110I1 : ProcResGroup<[THX3T110P1]>; + +// Branch micro-ops on ports I2/I3. +def THX3T110I23 : ProcResGroup<[THX3T110P2, THX3T110P3]>; + +// Branch micro-ops on ports I1/I2/I3. +def THX3T110I123 : ProcResGroup<[THX3T110P1, THX3T110P2, THX3T110P3]>; + +// Integer micro-ops on ports I0/I1/I2. +def THX3T110I012 : ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2]>; + +// Integer micro-ops on ports I0/I1/I2/I3. +def THX3T110I0123 : ProcResGroup<[THX3T110P0, THX3T110P1, + THX3T110P2, THX3T110P3]>; + +// FP micro-ops on ports FP0/FP1/FP2/FP3. +def THX3T110FP0123 : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]>; + +// FP micro-ops on ports FP2/FP3. +def THX3T110FP23 : ProcResGroup<[THX3T110P8FP2, THX3T110P9FP3]>; + +// ASIMD micro-ops on ports FP0/FP1/FP2/FP3. +def THX3T110SIMD : ProcResGroup<[THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]>; + +// Store data micro-ops only on port 10. +def THX3T110SD : ProcResGroup<[THX3T110SD0]>; + +// Load/store micro-ops on ports P4/P5. +def THX3T110LS : ProcResGroup<[THX3T110P4, THX3T110P5]>; + +// 70 entry unified scheduler. +def THX3T110ANY: ProcResGroup<[THX3T110P0, THX3T110P1, THX3T110P2, + THX3T110P3, THX3T110P4, THX3T110P5, + THX3T110P6FP0, THX3T110P7FP1, + THX3T110P8FP2, THX3T110P9FP3]> { + let BufferSize = 70; +} + +// Define commonly used write types for InstRW specializations. +// All definitions follow the format: THX3T110Write_Cyc_. + +// 3 cycles on I1. +def THX3T110Write_3Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on I1. +def THX3T110Write_4Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on I1. +def THX3T110Write_5Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 7 cycles on I1. +def THX3T110Write_7Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 23 cycles on I1. +def THX3T110Write_23Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX3T110Write_39Cyc_I1 : SchedWriteRes<[THX3T110I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} + +// 1 cycle on I2/I3 +def THX3T110Write_1Cyc_I23 : SchedWriteRes<[THX3T110I23]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 8 cycles on I2/I3 +def THX3T110Write_8Cyc_I23 : SchedWriteRes<[THX3T110I23]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 1 cycle on I1/I2/I3 +def THX3T110Write_1Cyc_I123 : SchedWriteRes<[THX3T110I123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 8 cycles on I1/I2/I3 +def THX3T110Write_8Cyc_I123 : SchedWriteRes<[THX3T110I123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 1 cycle on I0/I1/I2/I3. +def THX3T110Write_1Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0/I1/I2/I3. +def THX3T110Write_2Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 3 cycles on I0/I1/I2/I3. +def THX3T110Write_3Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on I0/I1/I2/I3. +def THX3T110Write_4Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on I0/I1/I2/I3. +def THX3T110Write_5Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on I0/I1/I2/I3. +def THX3T110Write_6Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 8 cycles on I0/I1/I2/I3. +def THX3T110Write_8Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 8; + let NumMicroOps = 4; +} + +// 13 cycles on I0/I1/I2/I3. +def THX3T110Write_13Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 13; + let NumMicroOps = 3; +} + +// 23 cycles on I0/I1/I2/I3. +def THX3T110Write_23Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 23; + let NumMicroOps = 3; +} + +// 39 cycles on I0/I1/I2/I3. +def THX3T110Write_39Cyc_I0123 : SchedWriteRes<[THX3T110I0123]> { + let Latency = 39; + let NumMicroOps = 3; +} + +// 4 cycles on F2/F3. +def THX3T110Write_4Cyc_F23 : SchedWriteRes<[THX3T110FP23]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0/F1/F2/F3. +def THX3T110Write_5Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 6 cycles on F0/F1/F2/F3. +def THX3T110Write_6Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on F0/F1/F2/F3. +def THX3T110Write_7Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on F0/F1/F2/F3. +def THX3T110Write_8Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0/F1/F2/F3. +def THX3T110Write_10Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 10; + let NumMicroOps = 3; +} + +// 16 cycles on F0/F1/F2/F3. +def THX3T110Write_16Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [8]; +} + +// 23 cycles on F0/F1/F2/F3. +def THX3T110Write_23Cyc_F01 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [11]; +} + +// 1 cycle on LS0/LS1. +def THX3T110Write_1Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +// 2 cycles on LS0/LS1. +def THX3T110Write_2Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1. +def THX3T110Write_4Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} + +// 5 cycles on LS0/LS1. +def THX3T110Write_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1. +def THX3T110Write_6Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 + 5 cycles on LS0/LS1. +// First resource is available after 4 cycles. +// Second resource is available after 5 cycles. +// Load vector pair, immed offset, Q-form [LDP/LDNP]. +def THX3T110Write_4_5Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [4, 5]; +} + +// 4 + 8 cycles on LS0/LS1. +// First resource is available after 4 cycles. +// Second resource is available after 8 cycles. +// Load vector pair, immed offset, S/D-form [LDP/LDNP]. +def THX3T110Write_4_8Cyc_LS01 : SchedWriteRes<[THX3T110LS]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [4, 8]; +} + +// 11 cycles on LS0/LS1 and I1. +def THX3T110Write_11Cyc_LS01_I1 : + SchedWriteRes<[THX3T110LS, THX3T110I1]> { + let Latency = 11; + let NumMicroOps = 4; +} + +// 1 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 1 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 3; +} + +// 4 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_I012 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1 and 2 of I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_I0123_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 1 cycle on LS0/LS1 and SD. +def THX3T110Write_1Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on LS0/LS1 and SD. +def THX3T110Write_2Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1 and SD. +def THX3T110Write_4Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1 and SD. +def THX3T110Write_5Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 5; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1 and SD. +def THX3T110Write_6Cyc_LS01_SD : + SchedWriteRes<[THX3T110LS, THX3T110SD]> { + let Latency = 6; + let NumMicroOps = 5; +} + +// 1 cycle on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_1Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_2Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_4Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 5 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_5Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 5; + let NumMicroOps = 4; +} + +// 6 cycles on LS0/LS1, SD and I0/I1/I2/I3. +def THX3T110Write_6Cyc_LS01_SD_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110SD, THX3T110I0123]> { + let Latency = 6; + let NumMicroOps = 5; +} + +// 1 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_1Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 5 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_5Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_6Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 7 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_7Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 7; + let NumMicroOps = 3; +} + +// 8 cycles on LS0/LS1 and F0/F1/F2/F3. +def THX3T110Write_8Cyc_LS01_F0123 : + SchedWriteRes<[THX3T110LS, THX3T110FP0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 8 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_8Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 12 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_12Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 12; + let NumMicroOps = 4; +} + +// 16 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_16Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 16; + let NumMicroOps = 5; +} + +// 24 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_24Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 24; + let NumMicroOps = 10; +} + +// 32 cycles on LS0/LS1 and I0/I1/I2/I3. +def THX3T110Write_32Cyc_LS01_I0123 : + SchedWriteRes<[THX3T110LS, THX3T110I0123]> { + let Latency = 32; + let NumMicroOps = 14; +} + +// 3 cycles on F0/F1/F2/F3. +def THX3T110Write_3Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 4 cycles on F0/F1/F2/F3. +def THX3T110Write_4Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 5 cycles on F0/F1/F2/F3. +def THX3T110Write_5Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 5; + let NumMicroOps = 2; +} + +// 10 cycles on F0/F1/F2/F3. +def THX3T110Write_10Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 10; + let NumMicroOps = 4; +} + +// 15 cycles on F0/F1/F2/F3. +def THX3T110Write_15Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 15; + let NumMicroOps = 7; +} + +// 16 cycles on F0/F1/F2/F3. +def THX3T110Write_16Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let NumMicroOps = 3; +} + +// 18 cycles on F0/F1/F2/F3. +def THX3T110Write_18Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 18; + let NumMicroOps = 3; +} + +// 19 cycles on F0/F1/F2/F3. +def THX3T110Write_19Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 19; + let NumMicroOps = 4; +} + +// 20 cycles on F0/F1/F2/F3. +def THX3T110Write_20Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 20; + let NumMicroOps = 4; +} + +// 23 cycles on F0/F1/F2/F3. +def THX3T110Write_23Cyc_F0123 : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let NumMicroOps = 4; +} + +// 3 cycles on F2/F3 and 4 cycles on F0/F1/F2/F3. +def THX3T110Write_3_4Cyc_F23_F0123 : + SchedWriteRes<[THX3T110FP23, THX3T110FP0123]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [3, 4]; +} + + +// Define commonly used read types. + +// No forwarding is provided for these types. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// 3. Instruction Tables. + +//--- +// 3.1 Branch Instructions +//--- + +// Branch, immed +// Branch and link, immed +// Compare and branch +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } + +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +//--- +// Branch +//--- +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs Bcc)>; +def : InstRW<[THX3T110Write_1Cyc_I23], (instrs RET)>; +def : InstRW<[THX3T110Write_1Cyc_I23], + (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; + +//--- +// 3.2 Arithmetic and Logical Instructions +// 3.3 Move and Shift Instructions +//--- + + +// ALU, basic +// Conditional compare +// Conditional select +// Address generation +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : InstRW<[WriteI], (instrs COPY)>; + +// ALU, extend and/or shift +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC(W|X)r", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", + "SBCS(W|X)r", "CCMN(W|X)(i|r)", + "CCMP(W|X)(i|r)", "CSEL(W|X)r", + "CSINC(W|X)r", "CSINV(W|X)r", + "CSNEG(W|X)r")>; + +// Move immed +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; + +// Variable shift +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +//--- +// 3.4 Divide and Multiply Instructions +//--- + +// Divide, W-form +// Latency range of 13-23/13-39. +def : WriteRes { + let Latency = 39; + let ResourceCycles = [39]; + let NumMicroOps = 4; +} + +// Divide, X-form +def : WriteRes { + let Latency = 23; + let ResourceCycles = [23]; + let NumMicroOps = 4; +} + +// Multiply accumulate, W-form +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + +// Multiply accumulate, X-form +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX3T110Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX3T110Write_5Cyc_I0123], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; + +// Bitfield extract, two reg +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX3T110Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX3T110Write_1Cyc_I0123], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX3T110Write_1Cyc_I0123], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; + +// Bitfield move, insert +def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "^BFM")>; +def : InstRW<[THX3T110Write_1Cyc_I0123], (instregex "(S|U)?BFM.*")>; + +// Count leading +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AES[DE]")>; +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^AESI?MC")>; +def : InstRW<[THX3T110Write_4Cyc_F0123], (instregex "^PMULL")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1SU0")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256SU0")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX3T110Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX3T110Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX3T110Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; + +// Reverse bits/bytes +// NOTE: Handled by WriteI. + +//--- +// 3.6 Load Instructions +// 3.10 FP Load Instructions +//--- + +// Load register, literal +// Load register, unscaled immed +// Load register, immed unprivileged +// Load register, unsigned immed +def : WriteRes { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register, immed post-index +// NOTE: Handled by WriteLD, WriteI. +// Load register, immed pre-index +// NOTE: Handled by WriteLD, WriteAdr. +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes { + let Latency = 4; + let NumMicroOps = 4; +} + +// Load register offset, basic +// Load register, register offset, scale by 4/8 +// Load register, register offset, scale by 2 +// Load register offset, extend +// Load register, register offset, extend, scale by 4/8 +// Load register, register offset, extend, scale by 2 +def THX3T110WriteLDIdx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def THX3T110ReadAdrBase : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// Load pair, immed pre-index, normal +// Load pair, immed pre-index, signed words +// Load pair, immed post-index, normal +// Load pair, immed post-index, signed words +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX3T110Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, + LDRSpre, LDRWpre, LDRXpre, + LDRSBWpre, LDRSBXpre, LDRSBWpost, LDRSBXpost, + LDRSHWpre, LDRSHXpre, LDRSHWpost, LDRSHXpost, + LDRBBpre, LDRBBpost, LDRHHpre, LDRHHpost)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; + +def : InstRW<[THX3T110Write_5Cyc_LS01_I0123, WriteI], + (instrs LDRBpost, LDRDpost, LDRHpost, + LDRQpost, LDRSpost, LDRWpost, LDRXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpre, LDPQpre, LDPSpre, LDPWpre, LDPXpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteAdr], + (instrs LDRBpre, LDRDpre, LDRHpre, LDRQpre, + LDRSpre, LDRWpre, LDRXpre)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteLDHi, WriteAdr], + (instrs LDPDpost, LDPQpost, LDPSpost, LDPWpost, LDPXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123_I0123, WriteI], + (instrs LDRBpost, LDRDpost, LDRHpost, LDRQpost, + LDRSpost, LDRWpost, LDRXpost)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_I0123, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instrs LDURSWi)>; + +// Load exclusive +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDAXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01], (instregex "^LDXP(W|X)$")>; + +//--- +// Prefetch +//--- +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX3T110Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + +//-- +// 3.7 Store Instructions +// 3.11 FP Store Instructions +//-- + +// Store register, unscaled immed +// Store register, immed unprivileged +// Store register, unsigned immed +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store register, immed post-index +// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase + +// Store register, immed pre-index +// NOTE: Handled by WriteAdr, WriteST + +// Store register, register offset, basic +// Store register, register offset, scaled by 4/8 +// Store register, register offset, scaled by 2 +// Store register, register offset, extend +// Store register, register offset, extend, scale by 4/8 +// Store register, register offset, extend, scale by 1 +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed offset, W-form +// Store pair, immed offset, X-form +def : WriteRes { + let Latency = 1; + let NumMicroOps = 2; +} + +// Store pair, immed post-index, W-form +// Store pair, immed post-index, X-form +// Store pair, immed pre-index, W-form +// Store pair, immed pre-index, X-form +// NOTE: Handled by WriteAdr, WriteSTP. +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURBBi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURHHi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURSi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURWi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_SD], (instrs STTRXi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPXi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STNPWi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPDi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPQi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPXi)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_SD], (instrs STPWi)>; + +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRBui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRDui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRHui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRQui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRXui)>; +def : InstRW<[THX3T110Write_1Cyc_LS01_I0123], (instrs STRWui)>; + +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRBui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRDui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRHui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRQui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRXui)>; +def : InstRW<[WriteSTP, THX3T110Write_1Cyc_LS01_SD], (instrs STRWui)>; + +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRBui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRDui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRHui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRQui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRXui)>; +def : InstRW<[WriteSTIdx, THX3T110Write_1Cyc_LS01_SD_I0123], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX3T110Write_1Cyc_LS01_I0123, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + +// Store exclusive +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instrs STNPWi, STNPXi)>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXP(W|X)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01_SD], (instregex "^STLXR(B|H|W|X)$")>; + +//--- +// 3.8 FP Data Processing Instructions +//--- + +// FP absolute value +// FP min/max +// FP negate +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP arithmetic +def : InstRW<[THX3T110Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; + +// FP compare +def : WriteRes { + let Latency = 5; + let NumMicroOps = 2; +} + +// FP Mul, Div, Sqrt +def : WriteRes { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX3T110XWriteFDiv : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFDivSP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFDivDP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFSqrtSP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX3T110XWriteFSqrtDP : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX3T110XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX3T110XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX3T110XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX3T110XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX3T110Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSr")>; + +// FP divide, D-form +// FP square root, D-form +def : InstRW<[THX3T110XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX3T110XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX3T110XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX3T110XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX3T110Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDr")>; + +// FP multiply +// FP multiply accumulate +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX3T110XWriteFMul : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX3T110XWriteFMulAcc : SchedWriteRes<[THX3T110FP0123]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX3T110XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX3T110XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; + +// FP round to integral +def : InstRW<[THX3T110Write_7Cyc_F01], + (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; + +// FP select +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FCSEL")>; + +//--- +// 3.9 FP Miscellaneous Instructions +//--- + +// FP convert, from vec to vec reg +// FP convert, from gen to vec reg +// FP convert, from vec to gen reg +def : WriteRes { + let Latency = 7; + let NumMicroOps = 3; +} + +// FP move, immed +// FP move, register +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +// FP transfer, from gen to vec reg +// FP transfer, from vec to gen reg +def : WriteRes { + let Latency = 4; + let NumMicroOps = 2; +} + +def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; + +//--- +// 3.12 ASIMD Integer Instructions +//--- + +// ASIMD absolute diff, D-form +// ASIMD absolute diff, Q-form +// ASIMD absolute diff accum, D-form +// ASIMD absolute diff accum, Q-form +// ASIMD absolute diff accum long +// ASIMD absolute diff long +// ASIMD arith, basic +// ASIMD arith, complex +// ASIMD compare +// ASIMD logical (AND, BIC, EOR) +// ASIMD max/min, basic +// ASIMD max/min, reduce, 4H/4S +// ASIMD max/min, reduce, 8B/8H +// ASIMD max/min, reduce, 16B +// ASIMD multiply, D-form +// ASIMD multiply, Q-form +// ASIMD multiply accumulate long +// ASIMD multiply accumulate saturating long +// ASIMD multiply long +// ASIMD pairwise add and accumulate +// ASIMD shift accumulate +// ASIMD shift by immed, basic +// ASIMD shift by immed and insert, basic, D-form +// ASIMD shift by immed and insert, basic, Q-form +// ASIMD shift by immed, complex +// ASIMD shift by register, basic, D-form +// ASIMD shift by register, basic, Q-form +// ASIMD shift by register, complex, D-form +// ASIMD shift by register, complex, Q-form +def : WriteRes { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} + +// ASIMD arith, reduce, 4H/4S +// ASIMD arith, reduce, 8B/8H +// ASIMD arith, reduce, 16B + +// ASIMD logical (MVN (alias for NOT), ORN, ORR) +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; + +// ASIMD arith, reduce +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; + +// ASIMD polynomial (8x8) multiply long +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX3T110Write_10Cyc_F0123], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; + +//--- +// 3.13 ASIMD Floating-point Instructions +//--- + +// ASIMD FP absolute value +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FABSv")>; + +// ASIMD FP arith, normal, D-form +// ASIMD FP arith, normal, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; + +// ASIMD FP arith,pairwise, D-form +// ASIMD FP arith, pairwise, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FADDPv")>; + +// ASIMD FP compare, D-form +// ASIMD FP compare, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FACGEv", "^FACGTv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FCMEQv", "^FCMGEv", + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; + +// ASIMD FP convert, long +// ASIMD FP convert, narrow +// ASIMD FP convert, other, D-form +// ASIMD FP convert, other, Q-form +// NOTE: Handled by WriteV. + +// ASIMD FP convert, long and narrow +def : InstRW<[THX3T110Write_5Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX3T110Write_5Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP divide, D-form, F32 +def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv2f32)>; +def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv2f32")>; + +// ASIMD FP divide, Q-form, F32 +def : InstRW<[THX3T110Write_16Cyc_F0123], (instrs FDIVv4f32)>; +def : InstRW<[THX3T110Write_16Cyc_F0123], (instregex "FDIVv4f32")>; + +// ASIMD FP divide, Q-form, F64 +def : InstRW<[THX3T110Write_23Cyc_F0123], (instrs FDIVv2f64)>; +def : InstRW<[THX3T110Write_23Cyc_F0123], (instregex "FDIVv2f64")>; + +// ASIMD FP max/min, normal, D-form +// ASIMD FP max/min, normal, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXv", "^FMAXNMv", + "^FMINv", "^FMINNMv")>; + +// ASIMD FP max/min, pairwise, D-form +// ASIMD FP max/min, pairwise, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXPv", "^FMAXNMPv", + "^FMINPv", "^FMINNMPv")>; + +// ASIMD FP max/min, reduce +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FMAXVv", "^FMAXNMVv", + "^FMINVv", "^FMINNMVv")>; + +// ASIMD FP multiply, D-form, FZ +// ASIMD FP multiply, D-form, no FZ +// ASIMD FP multiply, Q-form, FZ +// ASIMD FP multiply, Q-form, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP multiply accumulate, Dform, FZ +// ASIMD FP multiply accumulate, Dform, no FZ +// ASIMD FP multiply accumulate, Qform, FZ +// ASIMD FP multiply accumulate, Qform, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; + +// ASIMD FP negate +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^FNEGv")>; + +//-- +// 3.14 ASIMD Miscellaneous Instructions +//-- + +// ASIMD bit reverse +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^RBITv")>; + +// ASIMD bitwise insert, D-form +// ASIMD bitwise insert, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^BIFv", "^BITv", "^BSLv")>; + +// ASIMD count, D-form +// ASIMD count, Q-form +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], + (instregex "^CLSv", "^CLZv", "^CNTv")>; + +// ASIMD duplicate, gen reg +// ASIMD duplicate, element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^CPY")>; +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^DUPv.+gpr")>; + +// ASIMD extract +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^EXTv")>; + +// ASIMD extract narrow +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^XTNv")>; + +// ASIMD extract narrow, saturating +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; + +// ASIMD insert, element to element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; + +// ASIMD move, integer immed +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^MOVIv")>; + +// ASIMD move, FP immed +def : InstRW<[THX3T110Write_3_4Cyc_F23_F0123], (instregex "^FMOVv")>; + +// ASIMD transpose +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + +// ASIMD reciprocal estimate, D-form +// ASIMD reciprocal estimate, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", + "^FRSQRTEv", "^URSQRTEv")>; + +// ASIMD reciprocal step, D-form, FZ +// ASIMD reciprocal step, D-form, no FZ +// ASIMD reciprocal step, Q-form, FZ +// ASIMD reciprocal step, Q-form, no FZ +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^FRECPSv", "^FRSQRTSv")>; + +// ASIMD reverse +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^REV16v", "^REV32v", "^REV64v")>; + +// ASIMD table lookup, D-form +// ASIMD table lookup, Q-form +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; +def : InstRW<[THX3T110Write_10Cyc_F0123], + (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; +def : InstRW<[THX3T110Write_15Cyc_F0123], + (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; +def : InstRW<[THX3T110Write_20Cyc_F0123], + (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; + +// ASIMD transfer, element to word or word +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^[SU]MOVv")>; + +// ASIMD transfer, element to gen reg +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "(S|U)MOVv.*")>; + +// ASIMD transfer gen reg to element +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^INSv")>; + +// ASIMD transpose +def : InstRW<[THX3T110Write_5Cyc_F0123], + (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; + +// ASIMD unzip/zip +def : InstRW<[THX3T110Write_5Cyc_F0123], (instregex "^ZIP1v", "^ZIP2v")>; + +//-- +// 3.15 ASIMD Load Instructions +//-- + +// ASIMD load, 1 element, multiple, 1 reg, D-form +// ASIMD load, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX3T110Write_4Cyc_LS01], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 2 reg, D-form +// ASIMD load, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX3T110Write_4Cyc_LS01], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_4Cyc_LS01, WriteAdr], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 3 reg, D-form +// ASIMD load, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01, WriteAdr], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, multiple, 4 reg, D-form +// ASIMD load, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX3T110Write_6Cyc_LS01], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01, WriteAdr], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 1 element, one lane, B/H/S +// ASIMD load, 1 element, one lane, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD1i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD1i(8|16|32|64)_POST$")>; + +// ASIMD load, 1 element, all lanes, D-form, B/H/S +// ASIMD load, 1 element, all lanes, D-form, D +// ASIMD load, 1 element, all lanes, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, multiple, D-form, B/H/S +// ASIMD load, 2 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 2 element, one lane, B/H +// ASIMD load, 2 element, one lane, S +// ASIMD load, 2 element, one lane, D +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2i(8|16|32|64)_POST$")>; + +// ASIMD load, 2 element, all lanes, D-form, B/H/S +// ASIMD load, 2 element, all lanes, D-form, D +// ASIMD load, 2 element, all lanes, Q-form +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_5Cyc_LS01_F0123, WriteAdr], + (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, multiple, D-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, B/H/S +// ASIMD load, 3 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 3 element, one lone, B/H +// ASIMD load, 3 element, one lane, S +// ASIMD load, 3 element, one lane, D +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], + (instregex "^LD3i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3i(8|16|32|64)_POST$")>; + +// ASIMD load, 3 element, all lanes, D-form, B/H/S +// ASIMD load, 3 element, all lanes, D-form, D +// ASIMD load, 3 element, all lanes, Q-form, B/H/S +// ASIMD load, 3 element, all lanes, Q-form, D +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_7Cyc_LS01_F0123, WriteAdr], + (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, multiple, D-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, B/H/S +// ASIMD load, 4 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_8Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD load, 4 element, one lane, B/H +// ASIMD load, 4 element, one lane, S +// ASIMD load, 4 element, one lane, D +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], + (instregex "^LD4i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4i(8|16|32|64)_POST$")>; + +// ASIMD load, 4 element, all lanes, D-form, B/H/S +// ASIMD load, 4 element, all lanes, D-form, D +// ASIMD load, 4 element, all lanes, Q-form, B/H/S +// ASIMD load, 4 element, all lanes, Q-form, D +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_6Cyc_LS01_F0123, WriteAdr], + (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//-- +// 3.16 ASIMD Store Instructions +//-- + +// ASIMD store, 1 element, multiple, 1 reg, D-form +// ASIMD store, 1 element, multiple, 1 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 2 reg, D-form +// ASIMD store, 1 element, multiple, 2 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 3 reg, D-form +// ASIMD store, 1 element, multiple, 3 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, multiple, 4 reg, D-form +// ASIMD store, 1 element, multiple, 4 reg, Q-form +def : InstRW<[THX3T110Write_1Cyc_LS01], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01, WriteAdr], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 1 element, one lane, B/H/S +// ASIMD store, 1 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST1i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST1i(8|16|32|64)_POST$")>; + +// ASIMD store, 2 element, multiple, D-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, B/H/S +// ASIMD store, 2 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 2 element, one lane, B/H/S +// ASIMD store, 2 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST2i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST2i(8|16|32|64)_POST$")>; + +// ASIMD store, 3 element, multiple, D-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, B/H/S +// ASIMD store, 3 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 3 element, one lane, B/H +// ASIMD store, 3 element, one lane, S +// ASIMD store, 3 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST3i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST3i(8|16|32|64)_POST$")>; + +// ASIMD store, 4 element, multiple, D-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, B/H/S +// ASIMD store, 4 element, multiple, Q-form, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; + +// ASIMD store, 4 element, one lane, B/H +// ASIMD store, 4 element, one lane, S +// ASIMD store, 4 element, one lane, D +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123], + (instregex "^ST4i(8|16|32|64)$")>; +def : InstRW<[THX3T110Write_1Cyc_LS01_F0123, WriteAdr], + (instregex "^ST4i(8|16|32|64)_POST$")>; + +// V8.1a Atomics (LSE) +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs CASB, CASH, CASW, CASX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs CASAB, CASAH, CASAW, CASAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs CASLB, CASLH, CASLW, CASLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs CASALB, CASALH, CASALW, CASALX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, + LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, + LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, + LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, + LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, + LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, + LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, + LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, + LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, + LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, + LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, + LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, + LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs SWPB, SWPH, SWPW, SWPX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; + +def : InstRW<[THX3T110Write_6Cyc_I0123, WriteAtomic], + (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; + +def : InstRW<[THX3T110Write_8Cyc_I0123, WriteAtomic], + (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; + +def : InstRW<[THX3T110Write_4Cyc_I0123, WriteAtomic], + (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; + +// V8.3a PAC +def : InstRW<[THX3T110Write_11Cyc_LS01_I1], (instregex "^LDRAA", "^LDRAB")>; +def : InstRW<[THX3T110Write_8Cyc_I123], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, + BRAA, BRAAZ, BRAB, BRABZ)>; +def : InstRW<[THX3T110Write_8Cyc_I123], (instrs RETAA, RETAB)>; + +} // SchedModel = ThunderX3T110Model diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 3636d8d2b628..079e8f1764dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -160,6 +160,17 @@ void AArch64Subtarget::initializeProperties() { PrefFunctionLogAlignment = 4; PrefLoopLogAlignment = 2; break; + case ThunderX3T110: + CacheLineSize = 64; + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 2; + MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; } } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h index 79c2c161d3cb..40eb67a153e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -63,7 +63,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { ThunderXT81, ThunderXT83, ThunderXT88, - TSV110 + TSV110, + ThunderX3T110 }; protected: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp index 39d16e7999cd..1ac291fcb887 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -404,7 +404,7 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI, static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, char Mode, raw_ostream &O) { Register Reg = MO.getReg(); - bool EmitPercent = true; + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; if (!X86::GR8RegClass.contains(Reg) && !X86::GR16RegClass.contains(Reg) && @@ -443,6 +443,42 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, return false; } +static bool printAsmVRegister(X86AsmPrinter &P, const MachineOperand &MO, + char Mode, raw_ostream &O) { + unsigned Reg = MO.getReg(); + bool EmitPercent = MO.getParent()->getInlineAsmDialect() == InlineAsm::AD_ATT; + + unsigned Index; + if (X86::VR128XRegClass.contains(Reg)) + Index = Reg - X86::XMM0; + else if (X86::VR256XRegClass.contains(Reg)) + Index = Reg - X86::YMM0; + else if (X86::VR512RegClass.contains(Reg)) + Index = Reg - X86::ZMM0; + else + return true; + + switch (Mode) { + default: // Unknown mode. + return true; + case 'x': // Print V4SFmode register + Reg = X86::XMM0 + Index; + break; + case 't': // Print V8SFmode register + Reg = X86::YMM0 + Index; + break; + case 'g': // Print V16SFmode register + Reg = X86::ZMM0 + Index; + break; + } + + if (EmitPercent) + O << '%'; + + O << X86ATTInstPrinter::getRegisterName(Reg); + return false; +} + /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -517,6 +553,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, PrintOperand(MI, OpNo, O); return false; + case 'x': // Print V4SFmode register + case 't': // Print V8SFmode register + case 'g': // Print V16SFmode register + if (MO.isReg()) + return printAsmVRegister(*this, MO, ExtraCode[0], O); + PrintOperand(MI, OpNo, O); + return false; + case 'P': // This is the operand of a call, treat specially. PrintPCRelImm(MI, OpNo, O); return false; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 60eefbc677da..1523d56cc4e7 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23319,7 +23319,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // Must produce 0s in the correct bits. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast(CurrentOp); @@ -23331,7 +23332,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // Must produce 0s in the correct bits. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast(CurrentOp); @@ -23343,7 +23345,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, for (unsigned i = 0; i != NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); if (CurrentOp->isUndef()) { - Elts.push_back(CurrentOp); + // All shifted in bits must be the same so use 0. + Elts.push_back(DAG.getConstant(0, dl, ElementType)); continue; } auto *ND = cast(CurrentOp); @@ -39699,14 +39702,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { assert(EltBits.size() == VT.getVectorNumElements() && "Unexpected shift value type"); - for (APInt &Elt : EltBits) { - if (X86ISD::VSHLI == Opcode) + // Undef elements need to fold to 0. It's possible SimplifyDemandedBits + // created an undef input due to no input bits being demanded, but user + // still expects 0 in other bits. + for (unsigned i = 0, e = EltBits.size(); i != e; ++i) { + APInt &Elt = EltBits[i]; + if (UndefElts[i]) + Elt = 0; + else if (X86ISD::VSHLI == Opcode) Elt <<= ShiftVal; else if (X86ISD::VSRAI == Opcode) Elt.ashrInPlace(ShiftVal); else Elt.lshrInPlace(ShiftVal); } + // Reset undef elements since they were zeroed above. + UndefElts = 0; return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp index 245346d82731..90484241c28c 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3956,6 +3956,8 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm); MIB->setDesc(TII.get(X86::POP32r)); } + MIB->RemoveOperand(1); + MIB->addImplicitDefUseOperands(*MBB.getParent()); // Build CFI if necessary. MachineFunction &MF = *MBB.getParent(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index d8d7acae5c9f..81163e9fcfab 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -527,19 +527,19 @@ namespace { // Collect information about PHI nodes which can be transformed in // rewriteLoopExitValues. struct RewritePhi { - PHINode *PN; + PHINode *PN; // For which PHI node is this replacement? + unsigned Ith; // For which incoming value? + const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. + Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? + bool HighCost; // Is this expansion a high-cost? - // Ith incoming value. - unsigned Ith; + Value *Expansion = nullptr; + bool ValidRewrite = false; - // Exit value after expansion. - Value *Val; - - // High Cost when expansion. - bool HighCost; - - RewritePhi(PHINode *P, unsigned I, Value *V, bool H) - : PN(P), Ith(I), Val(V), HighCost(H) {} + RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, + bool H) + : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), + HighCost(H) {} }; } // end anonymous namespace @@ -671,41 +671,65 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { hasHardUserWithinLoop(L, Inst)) continue; + // Check if expansions of this SCEV would count as being high cost. bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << '\n' - << " LoopVal = " << *Inst << "\n"); - - if (!isValidRewrite(Inst, ExitVal)) { - DeadInsts.push_back(ExitVal); - continue; - } - -#ifndef NDEBUG - // If we reuse an instruction from a loop which is neither L nor one of - // its containing loops, we end up breaking LCSSA form for this loop by - // creating a new use of its instruction. - if (auto *ExitInsn = dyn_cast(ExitVal)) - if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) - if (EVL != L) - assert(EVL->contains(L) && "LCSSA breach detected!"); -#endif + // Note that we must not perform expansions until after + // we query *all* the costs, because if we perform temporary expansion + // inbetween, one that we might not intend to keep, said expansion + // *may* affect cost calculation of the the next SCEV's we'll query, + // and next SCEV may errneously get smaller cost. // Collect all the candidate PHINodes to be rewritten. - RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); + RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost); } } } + // Now that we've done preliminary filtering and billed all the SCEV's, + // we can perform the last sanity check - the expansion must be valid. + for (RewritePhi &Phi : RewritePhiSet) { + Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(), + Phi.ExpansionPoint); + + LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " + << *(Phi.Expansion) << '\n' + << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); + + // FIXME: isValidRewrite() is a hack. it should be an assert, eventually. + Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion); + if (!Phi.ValidRewrite) { + DeadInsts.push_back(Phi.Expansion); + continue; + } + +#ifndef NDEBUG + // If we reuse an instruction from a loop which is neither L nor one of + // its containing loops, we end up breaking LCSSA form for this loop by + // creating a new use of its instruction. + if (auto *ExitInsn = dyn_cast(Phi.Expansion)) + if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) + if (EVL != L) + assert(EVL->contains(L) && "LCSSA breach detected!"); +#endif + } + + // TODO: after isValidRewrite() is an assertion, evaluate whether + // it is beneficial to change how we calculate high-cost: + // if we have SCEV 'A' which we know we will expand, should we calculate + // the cost of other SCEV's after expanding SCEV 'A', + // thus potentially giving cost bonus to those other SCEV's? + bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); bool Changed = false; // Transformation. for (const RewritePhi &Phi : RewritePhiSet) { + if (!Phi.ValidRewrite) + continue; + PHINode *PN = Phi.PN; - Value *ExitVal = Phi.Val; + Value *ExitVal = Phi.Expansion; // Only do the rewrite when the ExitValue can be expanded cheaply. // If LoopCanBeDel is true, rewrite exit value aggressively. @@ -844,6 +868,8 @@ bool IndVarSimplify::canLoopBeDeleted( // phase later. Skip it in the loop invariant check below. bool found = false; for (const RewritePhi &Phi : RewritePhiSet) { + if (!Phi.ValidRewrite) + continue; unsigned i = Phi.Ith; if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { found = true; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp index da68d3713b40..d6b01a12b937 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -369,7 +369,8 @@ Value *Mapper::mapValue(const Value *V) { if (NewTy != IA->getFunctionType()) V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), - IA->hasSideEffects(), IA->isAlignStack()); + IA->hasSideEffects(), IA->isAlignStack(), + IA->getDialect()); } return getVM()[V] = const_cast(V); diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index 33bae0eb8c51..804f75afa722 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,14 +1,14 @@ // $FreeBSD$ -#define LLVM_REVISION "llvmorg-10.0.1-rc1-0-gf79cd71e145" +#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-10.0.1-rc1-0-gf79cd71e145" +#define CLANG_REVISION "llvmorg-10.0.0-97-g6f71678ecd2" #define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git" // - -#define LLD_REVISION "llvmorg-10.0.1-rc1-0-gf79cd71e145-1300007" +#define LLD_REVISION "llvmorg-10.0.0-97-g6f71678ecd2-1300007" #define LLD_REPOSITORY "FreeBSD" -#define LLDB_REVISION "llvmorg-10.0.1-rc1-0-gf79cd71e145" +#define LLDB_REVISION "llvmorg-10.0.0-97-g6f71678ecd2" #define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index 3f88e86658b4..57cc603c4502 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "llvmorg-10.0.1-rc1-0-gf79cd71e145" +#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"