From fa40418fea35c68de2a358bce3539cdc5cbcd21a Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 29 Jul 2021 22:31:35 +0200 Subject: [PATCH 1/3] Vendor import of llvm-project branch release/13.x llvmorg-13-init-16854-g6b2e4c5a58d7. --- clang/lib/Format/WhitespaceManager.cpp | 2 +- .../Utils/ScalarEvolutionExpander.h | 5 - .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 6 + .../Transforms/Scalar/LoopStrengthReduce.cpp | 482 +++--------------- .../Utils/ScalarEvolutionExpander.cpp | 5 +- 6 files changed, 87 insertions(+), 415 deletions(-) diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index ca2222d1feff..a822e0aaf1f9 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -347,7 +347,7 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (ScopeStart > Start + 1 && Changes[ScopeStart - 2].Tok->is(tok::identifier) && Changes[ScopeStart - 1].Tok->is(tok::l_paren)) - return true; + return Style.BinPackArguments; // Ternary operator if (Changes[i].Tok->is(TT_ConditionalExpr)) diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 59bf3a342caa..8662dbf385dc 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -83,9 +83,6 @@ class SCEVExpander : public SCEVVisitor { /// InsertedValues/InsertedPostIncValues. SmallPtrSet ReusedValues; - // The induction variables generated. - SmallVector InsertedIVs; - /// A memoization of the "relevant" loop for a given SCEV. DenseMap RelevantLoops; @@ -202,11 +199,9 @@ public: InsertedPostIncValues.clear(); ReusedValues.clear(); ChainedPhis.clear(); - InsertedIVs.clear(); } ScalarEvolution *getSE() { return &SE; } - const SmallVectorImpl &getInsertedIVs() const { return InsertedIVs; } /// Return a vector containing all instructions inserted during expansion. SmallVector getAllInsertedInstructions() const { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1415cce3b1df..09627ee6a164 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1660,7 +1660,7 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // physical registers if there is debug info associated with the terminator // of our mbb. We want to include said debug info in our terminator // sequence, so we return true in that case. - return MI.isDebugValue(); + return MI.isDebugInstr(); // We have left the terminator sequence if we are not doing one of the // following: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e7282aad05e2..ae702eedcd66 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4348,6 +4348,9 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); + Mask = DAG.getNode( + ISD::ZERO_EXTEND, DL, + VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) @@ -4452,6 +4455,9 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, ISD::ANY_EXTEND, DL, VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); + Mask = DAG.getNode( + ISD::ZERO_EXTEND, DL, + VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } else if (VT.isFloatingPoint()) { // Handle FP data by casting the data so an integer scatter can be used. EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount()); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5f210380ae5a..b585818af595 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1981,9 +1981,6 @@ class LSRInstance { /// IV users that belong to profitable IVChains. SmallPtrSet IVIncSet; - /// Induction variables that were generated and inserted by the SCEV Expander. - SmallVector ScalarEvolutionIVs; - void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -2088,9 +2085,6 @@ public: TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); bool getChanged() const { return Changed; } - const SmallVectorImpl &getScalarEvolutionIVs() const { - return ScalarEvolutionIVs; - } void print_factors_and_types(raw_ostream &OS) const; void print_fixups(raw_ostream &OS) const; @@ -5595,11 +5589,6 @@ void LSRInstance::ImplementSolution( GenerateIVChain(Chain, Rewriter, DeadInsts); Changed = true; } - - for (const WeakVH &IV : Rewriter.getInsertedIVs()) - if (IV && dyn_cast(&*IV)->getParent()) - ScalarEvolutionIVs.push_back(IV); - // Clean up after ourselves. This must be done before deleting any // instructions. Rewriter.clear(); @@ -5870,389 +5859,87 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); } -struct SCEVDbgValueBuilder { - SCEVDbgValueBuilder() = default; - SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { - Values = Base.Values; - Expr = Base.Expr; - } +using EqualValues = SmallVector, 4>; +using EqualValuesMap = + DenseMap>>; +using LocationMap = + DenseMap>; - /// The DIExpression as we translate the SCEV. - SmallVector Expr; - /// The location ops of the DIExpression. - SmallVector Values; - - void pushOperator(uint64_t Op) { Expr.push_back(Op); } - void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } - - /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value - /// in the set of values referenced by the expression. - void pushValue(llvm::Value *V) { - Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); - auto *It = - std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V)); - unsigned ArgIndex = 0; - if (It != Values.end()) { - ArgIndex = std::distance(Values.begin(), It); - } else { - ArgIndex = Values.size(); - Values.push_back(llvm::ValueAsMetadata::get(V)); - } - Expr.push_back(ArgIndex); - } - - void pushValue(const SCEVUnknown *U) { - llvm::Value *V = cast(U)->getValue(); - pushValue(V); - } - - void pushConst(const SCEVConstant *C) { - Expr.push_back(llvm::dwarf::DW_OP_consts); - Expr.push_back(C->getAPInt().getSExtValue()); - } - - /// Several SCEV types are sequences of the same arithmetic operator applied - /// to constants and values that may be extended or truncated. - bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, - uint64_t DwarfOp) { - assert((isa(CommExpr) || isa(CommExpr)) && - "Expected arithmetic SCEV type"); - bool Success = true; - unsigned EmitOperator = 0; - for (auto &Op : CommExpr->operands()) { - Success &= pushSCEV(Op); - - if (EmitOperator >= 1) - pushOperator(DwarfOp); - ++EmitOperator; - } - return Success; - } - - // TODO: Identify and omit noop casts. - bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { - const llvm::SCEV *Inner = C->getOperand(0); - const llvm::Type *Type = C->getType(); - uint64_t ToWidth = Type->getIntegerBitWidth(); - bool Success = pushSCEV(Inner); - uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, - IsSigned ? llvm::dwarf::DW_ATE_signed - : llvm::dwarf::DW_ATE_unsigned}; - for (const auto &Op : CastOps) - pushOperator(Op); - return Success; - } - - // TODO: MinMax - although these haven't been encountered in the test suite. - bool pushSCEV(const llvm::SCEV *S) { - bool Success = true; - if (const SCEVConstant *StartInt = dyn_cast(S)) { - pushConst(StartInt); - - } else if (const SCEVUnknown *U = dyn_cast(S)) { - if(!U->getValue()) - return false; - pushValue(U->getValue()); - - } else if (const SCEVMulExpr *MulRec = dyn_cast(S)) { - Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); - - } else if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { - Success &= pushSCEV(UDiv->getLHS()); - Success &= pushSCEV(UDiv->getRHS()); - pushOperator(llvm::dwarf::DW_OP_div); - - } else if (const SCEVCastExpr *Cast = dyn_cast(S)) { - // Assert if a new and unknown SCEVCastEXpr type is encountered. - assert((isa(Cast) || isa(Cast) || - isa(Cast) || isa(Cast)) && - "Unexpected cast type in SCEV."); - Success &= pushCast(Cast, (isa(Cast))); - - } else if (const SCEVAddExpr *AddExpr = dyn_cast(S)) { - Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); - - } else if (isa(S)) { - // Nested SCEVAddRecExpr are generated by nested loops and are currently - // unsupported. - return false; - - } else { - return false; - } - return Success; - } - - void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) { - // Re-state assumption that this dbg.value is not variadic. Any remaining - // opcodes in its expression operate on a single value already on the - // expression stack. Prepend our operations, which will re-compute and - // place that value on the expression stack. - assert(!DI.hasArgList()); - auto *NewExpr = - DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true); - DI.setExpression(NewExpr); - - auto ValArrayRef = llvm::ArrayRef(Values); - DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef)); - } - - /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the - /// location op index 0. - void setShortFinalExpression(llvm::DbgValueInst &DI, - const DIExpression *OldExpr) { - assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) && - "Expected DW_OP_llvm_arg and 0."); - DI.replaceVariableLocationOp( - 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0])); - - // See setFinalExpression: prepend our opcodes on the start of any old - // expression opcodes. - assert(!DI.hasArgList()); - llvm::SmallVector FinalExpr(Expr.begin() + 2, Expr.end()); - auto *NewExpr = - DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true); - DI.setExpression(NewExpr); - } - - /// Once the IV and variable SCEV translation is complete, write it to the - /// source DVI. - void applyExprToDbgValue(llvm::DbgValueInst &DI, - const DIExpression *OldExpr) { - assert(!Expr.empty() && "Unexpected empty expression."); - // Emit a simpler form if only a single location is referenced. - if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && - Expr[1] == 0) { - setShortFinalExpression(DI, OldExpr); - } else { - setFinalExpression(DI, OldExpr); - } - } - - /// Return true if the combination of arithmetic operator and underlying - /// SCEV constant value is an identity function. - bool isIdentityFunction(uint64_t Op, const SCEV *S) { - if (const SCEVConstant *C = dyn_cast(S)) { - int64_t I = C->getAPInt().getSExtValue(); - switch (Op) { - case llvm::dwarf::DW_OP_plus: - case llvm::dwarf::DW_OP_minus: - return I == 0; - case llvm::dwarf::DW_OP_mul: - case llvm::dwarf::DW_OP_div: - return I == 1; - } - } - return false; - } - - /// Convert a SCEV of a value to a DIExpression that is pushed onto the - /// builder's expression stack. The stack should already contain an - /// expression for the iteration count, so that it can be multiplied by - /// the stride and added to the start. - /// Components of the expression are omitted if they are an identity function. - /// Chain (non-affine) SCEVs are not supported. - bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { - assert(SAR.isAffine() && "Expected affine SCEV"); - // TODO: Is this check needed? - if (isa(SAR.getStart())) - return false; - - const SCEV *Start = SAR.getStart(); - const SCEV *Stride = SAR.getStepRecurrence(SE); - - // Skip pushing arithmetic noops. - if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { - if (!pushSCEV(Stride)) - return false; - pushOperator(llvm::dwarf::DW_OP_mul); - } - if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { - if (!pushSCEV(Start)) - return false; - pushOperator(llvm::dwarf::DW_OP_plus); - } - return true; - } - - /// Convert a SCEV of a value to a DIExpression that is pushed onto the - /// builder's expression stack. The stack should already contain an - /// expression for the iteration count, so that it can be multiplied by - /// the stride and added to the start. - /// Components of the expression are omitted if they are an identity function. - bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, - ScalarEvolution &SE) { - assert(SAR.isAffine() && "Expected affine SCEV"); - if (isa(SAR.getStart())) { - LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " - << SAR << '\n'); - return false; - } - const SCEV *Start = SAR.getStart(); - const SCEV *Stride = SAR.getStepRecurrence(SE); - - // Skip pushing arithmetic noops. - if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { - if (!pushSCEV(Start)) - return false; - pushOperator(llvm::dwarf::DW_OP_minus); - } - if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { - if (!pushSCEV(Stride)) - return false; - pushOperator(llvm::dwarf::DW_OP_div); - } - return true; - } -}; - -struct DVIRecoveryRec { - DbgValueInst *DVI; - DIExpression *Expr; - Metadata *LocationOp; - const llvm::SCEV *SCEV; -}; - -static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI, - const SCEVDbgValueBuilder &IterationCount, - ScalarEvolution &SE) { - // LSR may add locations to previously single location-op DVIs which - // are currently not supported. - if (CachedDVI.DVI->getNumVariableLocationOps() != 1) - return false; - - // SCEVs for SSA values are most frquently of the form - // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). - // This is because %a is a PHI node that is not the IV. However, these - // SCEVs have not been observed to result in debuginfo-lossy optimisations, - // so its not expected this point will be reached. - if (!isa(CachedDVI.SCEV)) - return false; - - LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: " - << *CachedDVI.SCEV << '\n'); - - const auto *Rec = cast(CachedDVI.SCEV); - if (!Rec->isAffine()) - return false; - - // Initialise a new builder with the iteration count expression. In - // combination with the value's SCEV this enables recovery. - SCEVDbgValueBuilder RecoverValue(IterationCount); - if (!RecoverValue.SCEVToValueExpr(*Rec, SE)) - return false; - - LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n'); - RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr); - LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n'); - return true; -} - -static bool -DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, - llvm::PHINode *LSRInductionVar, - SmallVector &DVIToUpdate) { - if (DVIToUpdate.empty()) - return false; - - const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); - assert(SCEVInductionVar && - "Anticipated a SCEV for the post-LSR induction variable"); - - bool Changed = false; - if (const SCEVAddRecExpr *IVAddRec = - dyn_cast(SCEVInductionVar)) { - SCEVDbgValueBuilder IterCountExpr; - IterCountExpr.pushValue(LSRInductionVar); - if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) - return false; - - LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar - << '\n'); - - // Needn't salvage if the location op hasn't been undef'd by LSR. - for (auto &DVIRec : DVIToUpdate) { - if (!DVIRec.DVI->isUndef()) - continue; - - // Some DVIs that were single location-op when cached are now multi-op, - // due to LSR optimisations. However, multi-op salvaging is not yet - // supported by SCEV salvaging. But, we can attempt a salvage by restoring - // the pre-LSR single-op expression. - if (DVIRec.DVI->hasArgList()) { - llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType(); - DVIRec.DVI->setRawLocation( - llvm::ValueAsMetadata::get(UndefValue::get(Ty))); - DVIRec.DVI->setExpression(DVIRec.Expr); - } - - Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE); - } - } - return Changed; -} - -/// Identify and cache salvageable DVI locations and expressions along with the -/// corresponding SCEV(s). Also ensure that the DVI is not deleted before -static void -DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, - SmallVector &SalvageableDVISCEVs, - SmallSet, 2> &DVIHandles) { +static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE, + EqualValuesMap &DbgValueToEqualSet, + LocationMap &DbgValueToLocation) { for (auto &B : L->getBlocks()) { for (auto &I : *B) { auto DVI = dyn_cast(&I); if (!DVI) continue; - - if (DVI->hasArgList()) - continue; - - if (!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) - continue; - - SalvageableDVISCEVs.push_back( - {DVI, DVI->getExpression(), DVI->getRawLocation(), - SE.getSCEV(DVI->getVariableLocationOp(0))}); - DVIHandles.insert(DVI); + for (unsigned Idx = 0; Idx < DVI->getNumVariableLocationOps(); ++Idx) { + // TODO: We can duplicate results if the same arg appears more than + // once. + Value *V = DVI->getVariableLocationOp(Idx); + if (!V || !SE.isSCEVable(V->getType())) + continue; + auto DbgValueSCEV = SE.getSCEV(V); + EqualValues EqSet; + for (PHINode &Phi : L->getHeader()->phis()) { + if (V->getType() != Phi.getType()) + continue; + if (!SE.isSCEVable(Phi.getType())) + continue; + auto PhiSCEV = SE.getSCEV(&Phi); + Optional Offset = + SE.computeConstantDifference(DbgValueSCEV, PhiSCEV); + if (Offset && Offset->getMinSignedBits() <= 64) + EqSet.emplace_back( + std::make_tuple(&Phi, Offset.getValue().getSExtValue())); + } + DbgValueToEqualSet[DVI].push_back({Idx, std::move(EqSet)}); + // If we fall back to using this raw location, at least one location op + // must be dead. A DIArgList will automatically undef arguments when + // they become unavailable, but a ValueAsMetadata will not; since we + // know the value should be undef, we use the undef value directly here. + Metadata *RawLocation = + DVI->hasArgList() ? DVI->getRawLocation() + : ValueAsMetadata::get(UndefValue::get( + DVI->getVariableLocationOp(0)->getType())); + DbgValueToLocation[DVI] = {DVI->getExpression(), RawLocation}; + } } } } -/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback -/// any PHi from the loop header is usable, but may have less chance of -/// surviving subsequent transforms. -static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, - const LSRInstance &LSR) { - // For now, just pick the first IV generated and inserted. Ideally pick an IV - // that is unlikely to be optimised away by subsequent transforms. - for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { - if (!IV) +static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet, + LocationMap &DbgValueToLocation) { + for (auto A : DbgValueToEqualSet) { + auto *DVI = A.first; + // Only update those that are now undef. + if (!DVI->isUndef()) continue; - - assert(isa(&*IV) && "Expected PhI node."); - if (SE.isSCEVable((*IV).getType())) { - PHINode *Phi = dyn_cast(&*IV); - LLVM_DEBUG(const llvm::SCEV *S = SE.getSCEV(Phi); - dbgs() << "scev-salvage: IV : " << *IV << "with SCEV: " << *S - << "\n"); - return Phi; + // The dbg.value may have had its value or expression changed during LSR by + // a failed salvage attempt; refresh them from the map. + auto *DbgDIExpr = DbgValueToLocation[DVI].first; + DVI->setRawLocation(DbgValueToLocation[DVI].second); + DVI->setExpression(DbgDIExpr); + assert(DVI->isUndef() && "dbg.value with non-undef location should not " + "have been modified by LSR."); + for (auto IdxEV : A.second) { + unsigned Idx = IdxEV.first; + for (auto EV : IdxEV.second) { + auto EVHandle = std::get(EV); + if (!EVHandle) + continue; + int64_t Offset = std::get(EV); + DVI->replaceVariableLocationOp(Idx, EVHandle); + if (Offset) { + SmallVector Ops; + DIExpression::appendOffset(Ops, Offset); + DbgDIExpr = DIExpression::appendOpsToArg(DbgDIExpr, Ops, Idx, true); + } + DVI->setExpression(DbgDIExpr); + break; + } } } - - for (PHINode &Phi : L.getHeader()->phis()) { - if (!SE.isSCEVable(Phi.getType())) - continue; - - const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi); - if (const llvm::SCEVAddRecExpr *Rec = dyn_cast(PhiSCEV)) - if (!Rec->isAffine()) - continue; - - LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi - << " with SCEV: " << *PhiSCEV << "\n"); - return Φ - } - return nullptr; } static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, @@ -6261,21 +5948,20 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA) { - // Debug preservation - before we start removing anything identify which DVI - // meet the salvageable criteria and store their DIExpression and SCEVs. - SmallVector SalvageableDVI; - SmallSet, 2> DVIHandles; - DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles); - bool Changed = false; std::unique_ptr MSSAU; if (MSSA) MSSAU = std::make_unique(MSSA); // Run the main LSR transformation. - const LSRInstance &Reducer = - LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); - Changed |= Reducer.getChanged(); + Changed |= + LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); + + // Debug preservation - before we start removing anything create equivalence + // sets for the llvm.dbg.value intrinsics. + EqualValuesMap DbgValueToEqualSet; + LocationMap DbgValueToLocation; + DbgGatherEqualValues(L, SE, DbgValueToEqualSet, DbgValueToLocation); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); @@ -6295,22 +5981,8 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, } } - if (SalvageableDVI.empty()) - return Changed; + DbgApplyEqualValues(DbgValueToEqualSet, DbgValueToLocation); - // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with - // expressions composed using the derived iteration count. - // TODO: Allow for multiple IV references for nested AddRecSCEVs - for (auto &L : LI) { - if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) - DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI); - else { - LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " - "could not be identified.\n"); - } - } - - DVIHandles.clear(); return Changed; } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 3978e1e29825..5af1c37e6197 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1393,10 +1393,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; - // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most - // effective when we are able to use an IV inserted here, so record it. + // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); - InsertedIVs.push_back(PN); + return PN; } From 9cb5bdb8b26e2207293f0fb56701c4a0ff64a47d Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 21 Aug 2021 23:25:07 +0200 Subject: [PATCH 2/3] Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc1-0-gd6974c010878. --- .../clang/Basic/DiagnosticDriverKinds.td | 2 - .../include/clang/Basic/DiagnosticLexKinds.td | 7 + .../clang/Basic/DiagnosticSemaKinds.td | 2 - clang/include/clang/Basic/LangOptions.def | 2 +- clang/include/clang/Driver/Options.td | 3 - clang/include/clang/Driver/Types.h | 8 - .../Frontend/PreprocessorOutputOptions.h | 2 - clang/include/clang/Lex/HeaderSearch.h | 17 +- clang/include/clang/Lex/Preprocessor.h | 5 +- clang/include/clang/Lex/PreprocessorLexer.h | 20 +- clang/lib/Basic/OpenCLOptions.cpp | 7 +- clang/lib/Basic/TargetInfo.cpp | 10 +- clang/lib/Basic/Targets/AArch64.cpp | 6 +- clang/lib/Basic/Targets/AMDGPU.h | 5 +- clang/lib/CodeGen/CGStmt.cpp | 43 ++- clang/lib/CodeGen/TargetInfo.cpp | 14 + clang/lib/CodeGen/TargetInfo.h | 7 + clang/lib/Driver/ToolChains/Clang.cpp | 15 +- clang/lib/Driver/Types.cpp | 39 -- clang/lib/Frontend/CompilerInvocation.cpp | 2 +- .../lib/Frontend/PrintPreprocessedOutput.cpp | 341 +++++++----------- clang/lib/Lex/Lexer.cpp | 4 +- clang/lib/Lex/PPDirectives.cpp | 4 + clang/lib/Lex/PPLexerChange.cpp | 45 ++- clang/lib/Lex/Pragma.cpp | 92 ++++- clang/lib/Lex/Preprocessor.cpp | 6 - clang/lib/Parse/ParseDecl.cpp | 16 +- clang/lib/Sema/Sema.cpp | 3 +- clang/lib/Sema/SemaType.cpp | 49 ++- libcxx/include/format | 12 +- libcxx/include/ranges | 12 +- lld/ELF/Config.h | 7 +- lld/ELF/Driver.cpp | 16 +- lld/ELF/Options.td | 3 + lld/ELF/Symbols.cpp | 8 +- lld/ELF/SyntheticSections.cpp | 2 +- lld/docs/ReleaseNotes.rst | 2 + lld/docs/ld.lld.1 | 3 + llvm/include/llvm/CodeGen/TargetLowering.h | 5 + llvm/include/llvm/CodeGen/ValueTypes.td | 1 + llvm/include/llvm/Support/MachineValueType.h | 4 +- llvm/lib/Analysis/TargetLibraryInfo.cpp | 7 +- .../CodeGen/GlobalISel/InlineAsmLowering.cpp | 9 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 6 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 3 +- llvm/lib/CodeGen/ValueTypes.cpp | 2 + llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 7 + .../Target/AArch64/AArch64ISelLowering.cpp | 55 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 7 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 + .../lib/Target/AArch64/AArch64RegisterInfo.td | 4 +- .../Target/AArch64/Utils/AArch64BaseInfo.h | 19 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 + llvm/lib/Transforms/Scalar/DivRemPairs.cpp | 7 +- llvm/utils/TableGen/CodeGenTarget.cpp | 1 + 56 files changed, 612 insertions(+), 391 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3b4daa59f66b..fc3704303a95 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -129,8 +129,6 @@ def err_drv_invalid_Xopenmp_target_with_args : Error< "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">; def err_drv_argument_only_allowed_with : Error< "invalid argument '%0' only allowed with '%1'">; -def err_drv_minws_unsupported_input_type : Error< - "'-fminimize-whitespace' invalid for input of type %0">; def err_drv_amdgpu_ieee_without_no_honor_nans : Error< "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">; def err_drv_argument_not_allowed_with : Error< diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index ce6d0d0394b4..bdf5d263fa92 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -300,6 +300,13 @@ def pp_pragma_once_in_main_file : Warning<"#pragma once in main file">, def pp_pragma_sysheader_in_main_file : Warning< "#pragma system_header ignored in main file">, InGroup>; + +def err_pragma_include_instead_not_sysheader : Error< + "'#pragma clang include_instead' cannot be used outside of system headers">; +def err_pragma_include_instead_system_reserved : Error< + "header '%0' is an implementation detail; #include %select{'%2'|either '%2' " + "or '%3'|one of %2}1 instead">; + def pp_poisoning_existing_macro : Warning<"poisoning existing macro">; def pp_out_of_date_dependency : Warning< "current file is older than dependency %0">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 108f1796415c..c57b8eca7deb 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10100,8 +10100,6 @@ def err_opencl_requires_extension : Error< def ext_opencl_double_without_pragma : Extension< "Clang permits use of type 'double' regardless pragma if 'cl_khr_fp64' is" " supported">; -def err_opencl_double_requires_extension : Error< - "use of type 'double' requires %select{cl_khr_fp64|cl_khr_fp64 and __opencl_c_fp64}0 support">; def warn_opencl_generic_address_space_arg : Warning< "passing non-generic address space pointer to %0" " may cause dynamic conversion affecting performance">, diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 08b8d8851afa..74deba6ef7fb 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -224,7 +224,7 @@ LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL") LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version") LANGOPT(OpenCLGenericAddressSpace, 1, 0, "OpenCL generic keyword") -LANGOPT(OpenCLPipe , 1, 0, "OpenCL pipe keyword") +LANGOPT(OpenCLPipes , 1, 0, "OpenCL pipes language constructs and built-ins") LANGOPT(NativeHalfType , 1, 0, "Native half type support") LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5a9fd078390e..ab1a5487d9c0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1799,9 +1799,6 @@ def frewrite_map_file_EQ : Joined<["-"], "frewrite-map-file=">, defm use_line_directives : BoolFOption<"use-line-directives", PreprocessorOutputOpts<"UseLineDirectives">, DefaultFalse, PosFlag, NegFlag>; -defm minimize_whitespace : BoolFOption<"minimize-whitespace", - PreprocessorOutputOpts<"MinimizeWhitespace">, DefaultFalse, - PosFlag, NegFlag>; def ffreestanding : Flag<["-"], "ffreestanding">, Group, Flags<[CC1Option]>, HelpText<"Assert that the compilation takes place in a freestanding environment">, diff --git a/clang/include/clang/Driver/Types.h b/clang/include/clang/Driver/Types.h index c9d63551090c..6a1f57416ae5 100644 --- a/clang/include/clang/Driver/Types.h +++ b/clang/include/clang/Driver/Types.h @@ -66,14 +66,6 @@ namespace types { /// isAcceptedByClang - Can clang handle this input type. bool isAcceptedByClang(ID Id); - /// isDerivedFromC - Is the input derived from C. - /// - /// That is, does the lexer follow the rules of - /// TokenConcatenation::AvoidConcat. If this is the case, the preprocessor may - /// add and remove whitespace between tokens. Used to determine whether the - /// input can be processed by -fminimize-whitespace. - bool isDerivedFromC(ID Id); - /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers). bool isCXX(ID Id); diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index 257538ee0606..72e5ad1137fb 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -24,7 +24,6 @@ public: unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. - unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. public: PreprocessorOutputOptions() { @@ -37,7 +36,6 @@ public: ShowIncludeDirectives = 0; RewriteIncludes = 0; RewriteImports = 0; - MinimizeWhitespace = 0; } }; diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 93d6ea72270a..a35a394f719b 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -20,9 +20,12 @@ #include "clang/Lex/ModuleMap.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Allocator.h" #include #include @@ -110,6 +113,14 @@ struct HeaderFileInfo { /// of the framework. StringRef Framework; + /// List of aliases that this header is known as. + /// Most headers should only have at most one alias, but a handful + /// have two. + llvm::SetVector, + llvm::SmallVector, 2>, + llvm::SmallSet, 2>> + Aliases; + HeaderFileInfo() : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User), External(false), isModuleHeader(false), isCompilingModuleHeader(false), @@ -453,6 +464,10 @@ public: getFileInfo(File).DirInfo = SrcMgr::C_System; } + void AddFileAlias(const FileEntry *File, StringRef Alias) { + getFileInfo(File).Aliases.insert(Alias); + } + /// Mark the specified file as part of a module. void MarkFileModuleHeader(const FileEntry *FE, ModuleMap::ModuleHeaderRole Role, diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 7ab13640ce2c..fe2327f0a480 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1953,7 +1953,8 @@ public: /// This either returns the EOF token and returns true, or /// pops a level off the include stack and returns false, at which point the /// client should call lex again. - bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false); + bool HandleEndOfFile(Token &Result, SourceLocation Loc, + bool isEndOfMacro = false); /// Callback invoked when the current TokenLexer hits the end of its /// token stream. @@ -2363,12 +2364,14 @@ private: // Pragmas. void HandlePragmaDirective(PragmaIntroducer Introducer); + void ResolvePragmaIncludeInstead(SourceLocation Location) const; public: void HandlePragmaOnce(Token &OnceTok); void HandlePragmaMark(Token &MarkTok); void HandlePragmaPoison(); void HandlePragmaSystemHeader(Token &SysHeaderTok); + void HandlePragmaIncludeInstead(Token &Tok); void HandlePragmaDependency(Token &DependencyTok); void HandlePragmaPushMacro(Token &Tok); void HandlePragmaPopMacro(Token &Tok); diff --git a/clang/include/clang/Lex/PreprocessorLexer.h b/clang/include/clang/Lex/PreprocessorLexer.h index 03b1cc2c10e2..b43197a6031c 100644 --- a/clang/include/clang/Lex/PreprocessorLexer.h +++ b/clang/include/clang/Lex/PreprocessorLexer.h @@ -14,11 +14,13 @@ #ifndef LLVM_CLANG_LEX_PREPROCESSORLEXER_H #define LLVM_CLANG_LEX_PREPROCESSORLEXER_H +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/HeaderSearch.h" #include "clang/Lex/MultipleIncludeOpt.h" #include "clang/Lex/Token.h" -#include "clang/Basic/SourceLocation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include namespace clang { @@ -74,6 +76,13 @@ protected: /// we are currently in. SmallVector ConditionalStack; + struct IncludeInfo { + const FileEntry *File; + SourceLocation Location; + }; + // A complete history of all the files included by the current file. + llvm::StringMap IncludeHistory; + PreprocessorLexer() : FID() {} PreprocessorLexer(Preprocessor *pp, FileID fid); virtual ~PreprocessorLexer() = default; @@ -175,6 +184,15 @@ public: ConditionalStack.clear(); ConditionalStack.append(CL.begin(), CL.end()); } + + void addInclude(StringRef Filename, const FileEntry &File, + SourceLocation Location) { + IncludeHistory.insert({Filename, {&File, Location}}); + } + + const llvm::StringMap &getIncludeHistory() const { + return IncludeHistory; + } }; } // namespace clang diff --git a/clang/lib/Basic/OpenCLOptions.cpp b/clang/lib/Basic/OpenCLOptions.cpp index 2e215b185f66..b7408f39bdab 100644 --- a/clang/lib/Basic/OpenCLOptions.cpp +++ b/clang/lib/Basic/OpenCLOptions.cpp @@ -111,7 +111,9 @@ bool OpenCLOptions::diagnoseUnsupportedFeatureDependencies( // Feature pairs. First feature in a pair requires the second one to be // supported. static const llvm::StringMap DependentFeaturesMap = { - {"__opencl_c_read_write_images", "__opencl_c_images"}}; + {"__opencl_c_read_write_images", "__opencl_c_images"}, + {"__opencl_c_3d_image_writes", "__opencl_c_images"}, + {"__opencl_c_pipes", "__opencl_c_generic_address_space"}}; auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts(); @@ -130,7 +132,8 @@ bool OpenCLOptions::diagnoseFeatureExtensionDifferences( const TargetInfo &TI, DiagnosticsEngine &Diags) { // Extensions and equivalent feature pairs. static const llvm::StringMap FeatureExtensionMap = { - {"cl_khr_fp64", "__opencl_c_fp64"}}; + {"cl_khr_fp64", "__opencl_c_fp64"}, + {"cl_khr_3d_image_writes", "__opencl_c_3d_image_writes"}}; auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts(); diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index b647a2fb8a67..5f8e04c2bd6c 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -400,14 +400,18 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { // OpenCL C v3.0 s6.7.5 - The generic address space requires support for // OpenCL C 2.0 or OpenCL C 3.0 with the __opencl_c_generic_address_space // feature - // FIXME: OpenCLGenericAddressSpace is also defined in setLangDefaults() + // OpenCL C v3.0 s6.2.1 - OpenCL pipes require support of OpenCL C 2.0 + // or later and __opencl_c_pipes feature + // FIXME: These language options are also defined in setLangDefaults() // for OpenCL C 2.0 but with no access to target capabilities. Target - // should be immutable once created and thus this language option needs + // should be immutable once created and thus these language options need // to be defined only once. - if (Opts.OpenCLVersion >= 300) { + if (Opts.OpenCLVersion == 300) { const auto &OpenCLFeaturesMap = getSupportedOpenCLOpts(); Opts.OpenCLGenericAddressSpace = hasFeatureEnabled( OpenCLFeaturesMap, "__opencl_c_generic_address_space"); + Opts.OpenCLPipes = + hasFeatureEnabled(OpenCLFeaturesMap, "__opencl_c_pipes"); } } diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 4070ac727d16..e163ebfa2348 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -431,7 +431,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { Feature == "sve2-aes" || Feature == "sve2-sha3" || Feature == "sve2-sm4" || Feature == "f64mm" || Feature == "f32mm" || Feature == "i8mm" || Feature == "bf16") && - (FPU & SveMode)); + (FPU & SveMode)) || + (Feature == "ls64" && HasLS64); } bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, @@ -752,6 +753,9 @@ bool AArch64TargetInfo::validateConstraintModifier( if (Size == 64) return true; + if (Size == 512) + return HasLS64; + SuggestedModifier = "w"; return false; } diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 244a6e044690..2e580ecf2425 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -310,9 +310,12 @@ public: Opts["cl_khr_mipmap_image"] = true; Opts["cl_khr_mipmap_image_writes"] = true; Opts["cl_khr_subgroups"] = true; - Opts["cl_khr_3d_image_writes"] = true; Opts["cl_amd_media_ops"] = true; Opts["cl_amd_media_ops2"] = true; + + Opts["__opencl_c_images"] = true; + Opts["__opencl_c_3d_image_writes"] = true; + Opts["cl_khr_3d_image_writes"] = true; } } diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index aeb319ca1581..0a3a722fa653 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2097,7 +2097,8 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info, } else { llvm::Type *Ty = ConvertType(InputType); uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty); - if (Size <= 64 && llvm::isPowerOf2_64(Size)) { + if ((Size <= 64 && llvm::isPowerOf2_64(Size)) || + getTargetHooks().isScalarizableAsmOperand(*this, Ty)) { Ty = llvm::IntegerType::get(getLLVMContext(), Size); Ty = llvm::PointerType::getUnqual(Ty); @@ -2320,23 +2321,28 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. - bool isScalarizableAggregate = - hasAggregateEvaluationKind(OutExpr->getType()); - if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) || - isScalarizableAggregate)) { + QualType QTy = OutExpr->getType(); + const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) || + hasAggregateEvaluationKind(QTy); + if (!Info.allowsMemory() && IsScalarOrAggregate) { + Constraints += "=" + OutputConstraint; - ResultRegQualTys.push_back(OutExpr->getType()); + ResultRegQualTys.push_back(QTy); ResultRegDests.push_back(Dest); - ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); - if (Info.allowsRegister() && isScalarizableAggregate) { - ResultTypeRequiresCast.push_back(true); - unsigned Size = getContext().getTypeSize(OutExpr->getType()); - llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size); - ResultRegTypes.push_back(ConvTy); - } else { - ResultTypeRequiresCast.push_back(false); - ResultRegTypes.push_back(ResultTruncRegTypes.back()); + + llvm::Type *Ty = ConvertTypeForMem(QTy); + const bool RequiresCast = Info.allowsRegister() && + (getTargetHooks().isScalarizableAsmOperand(*this, Ty) || + Ty->isAggregateType()); + + ResultTruncRegTypes.push_back(Ty); + ResultTypeRequiresCast.push_back(RequiresCast); + + if (RequiresCast) { + unsigned Size = getContext().getTypeSize(QTy); + Ty = llvm::IntegerType::get(getLLVMContext(), Size); } + ResultRegTypes.push_back(Ty); // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the // same as the input type. @@ -2638,11 +2644,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; + llvm::Type *TruncTy = ResultTruncRegTypes[i]; // If the result type of the LLVM IR asm doesn't match the result type of // the expression, do the conversion. if (ResultRegTypes[i] != ResultTruncRegTypes[i]) { - llvm::Type *TruncTy = ResultTruncRegTypes[i]; // Truncate the integer result to the right size, note that TruncTy can be // a pointer. @@ -2672,6 +2678,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); Address A = Builder.CreateBitCast(Dest.getAddress(*this), ResultRegTypes[i]->getPointerTo()); + if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) { + Builder.CreateStore(Tmp, A); + continue; + } + QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); if (Ty.isNull()) { const Expr *OutExpr = S.getOutputExpr(i); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index a2b68a04d351..d2cc0a699f43 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5526,6 +5526,20 @@ public: Fn->addFnAttr("branch-target-enforcement", BPI.BranchTargetEnforcement ? "true" : "false"); } + + bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, + llvm::Type *Ty) const override { + if (CGF.getTarget().hasFeature("ls64")) { + auto *ST = dyn_cast(Ty); + if (ST && ST->getNumElements() == 1) { + auto *AT = dyn_cast(ST->getElementType(0)); + if (AT && AT->getNumElements() == 8 && + AT->getElementType()->isIntegerTy(64)) + return true; + } + } + return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty); + } }; class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index e6e474544fc4..aa8bbb60a75f 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -148,6 +148,13 @@ public: return Ty; } + /// Target hook to decide whether an inline asm operand can be passed + /// by value. + virtual bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF, + llvm::Type *Ty) const { + return false; + } + /// Adds constraints and types for result registers. virtual void addReturnRegisterOutputs( CodeGen::CodeGenFunction &CGF, CodeGen::LValue ReturnValue, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a4b53a640ab5..1870bd81789c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -52,9 +52,8 @@ using namespace clang; using namespace llvm::opt; static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) { - if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC, - options::OPT_fminimize_whitespace, - options::OPT_fno_minimize_whitespace)) { + if (Arg *A = + Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC)) { if (!Args.hasArg(options::OPT_E) && !Args.hasArg(options::OPT__SLASH_P) && !Args.hasArg(options::OPT__SLASH_EP) && !D.CCCIsCPP()) { D.Diag(clang::diag::err_drv_argument_only_allowed_with) @@ -6068,16 +6067,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_use_line_directives, false)) CmdArgs.push_back("-fuse-line-directives"); - // -fno-minimize-whitespace is default. - if (Args.hasFlag(options::OPT_fminimize_whitespace, - options::OPT_fno_minimize_whitespace, false)) { - types::ID InputType = Inputs[0].getType(); - if (!isDerivedFromC(InputType)) - D.Diag(diag::err_drv_minws_unsupported_input_type) - << types::getTypeName(InputType); - CmdArgs.push_back("-fminimize-whitespace"); - } - // -fms-extensions=0 is default. if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions, IsWindowsMSVC)) diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index 3cb2d6e8f6fd..b7ccdf23cbaa 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -147,45 +147,6 @@ bool types::isAcceptedByClang(ID Id) { } } -bool types::isDerivedFromC(ID Id) { - switch (Id) { - default: - return false; - - case TY_PP_C: - case TY_C: - case TY_CL: - case TY_CLCXX: - case TY_PP_CUDA: - case TY_CUDA: - case TY_CUDA_DEVICE: - case TY_PP_HIP: - case TY_HIP: - case TY_HIP_DEVICE: - case TY_PP_ObjC: - case TY_PP_ObjC_Alias: - case TY_ObjC: - case TY_PP_CXX: - case TY_CXX: - case TY_PP_ObjCXX: - case TY_PP_ObjCXX_Alias: - case TY_ObjCXX: - case TY_RenderScript: - case TY_PP_CHeader: - case TY_CHeader: - case TY_CLHeader: - case TY_PP_ObjCHeader: - case TY_ObjCHeader: - case TY_PP_CXXHeader: - case TY_CXXHeader: - case TY_PP_ObjCXXHeader: - case TY_ObjCXXHeader: - case TY_CXXModule: - case TY_PP_CXXModule: - return true; - } -} - bool types::isObjC(ID Id) { switch (Id) { default: diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d545e9358f04..33e5f3e99c45 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3173,7 +3173,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, Opts.ZVector = 0; Opts.setDefaultFPContractMode(LangOptions::FPM_On); Opts.OpenCLCPlusPlus = Opts.CPlusPlus; - Opts.OpenCLPipe = Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200; + Opts.OpenCLPipes = Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200; Opts.OpenCLGenericAddressSpace = Opts.OpenCLCPlusPlus || Opts.OpenCLVersion == 200; diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index b7259569595d..24ea1ccba207 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -95,20 +95,14 @@ private: bool DumpIncludeDirectives; bool UseLineDirectives; bool IsFirstFileEntered; - bool MinimizeWhitespace; - - Token PrevTok; - Token PrevPrevTok; - public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers, bool defines, bool DumpIncludeDirectives, - bool UseLineDirectives, bool MinimizeWhitespace) + bool UseLineDirectives) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), - UseLineDirectives(UseLineDirectives), - MinimizeWhitespace(MinimizeWhitespace) { + UseLineDirectives(UseLineDirectives) { CurLine = 0; CurFilename += ""; EmittedTokensOnThisLine = false; @@ -116,13 +110,8 @@ public: FileType = SrcMgr::C_User; Initialized = false; IsFirstFileEntered = false; - - PrevTok.startToken(); - PrevPrevTok.startToken(); } - bool isMinimizeWhitespace() const { return MinimizeWhitespace; } - void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } @@ -131,12 +120,7 @@ public: return EmittedDirectiveOnThisLine; } - /// Ensure that the output stream position is at the beginning of a new line - /// and inserts one if it does not. It is intended to ensure that directives - /// inserted by the directives not from the input source (such as #line) are - /// in the first column. To insert newlines that represent the input, use - /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true). - void startNewLineIfNeeded(); + bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true); void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, @@ -164,45 +148,18 @@ public: void PragmaAssumeNonNullBegin(SourceLocation Loc) override; void PragmaAssumeNonNullEnd(SourceLocation Loc) override; - /// Insert whitespace before emitting the next token. - /// - /// @param Tok Next token to be emitted. - /// @param RequireSpace Ensure at least one whitespace is emitted. Useful - /// if non-tokens have been emitted to the stream. - /// @param RequireSameLine Never emit newlines. Useful when semantics depend - /// on being on the same line, such as directives. - void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace, - bool RequireSameLine); + bool HandleFirstTokOnLine(Token &Tok); /// Move to the line of the provided source location. This will - /// return true if a newline was inserted or if - /// the requested location is the first token on the first line. - /// In these cases the next output will be the first column on the line and - /// make it possible to insert indention. The newline was inserted - /// implicitly when at the beginning of the file. - /// - /// @param Tok Token where to move to. - /// @param RequiresStartOfLine Whether the next line depends on being in the - /// first column, such as a directive. - /// - /// @return Whether column adjustments are necessary. - bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { - PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); - if (PLoc.isInvalid()) - return false; - bool IsFirstInFile = Tok.isAtStartOfLine() && PLoc.getLine() == 1; - return MoveToLine(PLoc.getLine(), RequireStartOfLine) || IsFirstInFile; - } - - /// Move to the line of the provided source location. Returns true if a new - /// line was inserted. - bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { + /// return true if the output stream required adjustment or if + /// the requested location is on the first line. + bool MoveToLine(SourceLocation Loc) { PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isInvalid()) return false; - return MoveToLine(PLoc.getLine(), RequireStartOfLine); + return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1); } - bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); + bool MoveToLine(unsigned LineNo); bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) { @@ -230,7 +187,7 @@ public: void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, const char *Extra, unsigned ExtraLen) { - startNewLineIfNeeded(); + startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); // Emit #line directives or GNU line markers depending on what mode we're in. if (UseLineDirectives) { @@ -257,57 +214,43 @@ void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, /// object. We can do this by emitting some number of \n's, or be emitting a /// #line directive. This returns false if already at the specified line, true /// if some newlines were emitted. -bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, - bool RequireStartOfLine) { - // If it is required to start a new line or finish the current, insert - // vertical whitespace now and take it into account when moving to the - // expected line. - bool StartedNewLine = false; - if ((RequireStartOfLine && EmittedTokensOnThisLine) || - EmittedDirectiveOnThisLine) { - OS << '\n'; - StartedNewLine = true; - CurLine += 1; - EmittedTokensOnThisLine = false; - EmittedDirectiveOnThisLine = false; - } - +bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) { // If this line is "close enough" to the original line, just print newlines, // otherwise print a #line directive. - if (CurLine == LineNo) { - // Nothing to do if we are already on the correct line. - } else if (!StartedNewLine && (!MinimizeWhitespace || !DisableLineMarkers) && - LineNo - CurLine == 1) { - // Printing a single line has priority over printing a #line directive, even - // when minimizing whitespace which otherwise would print #line directives - // for every single line. - OS << '\n'; - StartedNewLine = true; - } else if (!MinimizeWhitespace && LineNo - CurLine <= 8) { - const char *NewLines = "\n\n\n\n\n\n\n\n"; - OS.write(NewLines, LineNo - CurLine); - StartedNewLine = true; + if (LineNo-CurLine <= 8) { + if (LineNo-CurLine == 1) + OS << '\n'; + else if (LineNo == CurLine) + return false; // Spelling line moved, but expansion line didn't. + else { + const char *NewLines = "\n\n\n\n\n\n\n\n"; + OS.write(NewLines, LineNo-CurLine); + } } else if (!DisableLineMarkers) { // Emit a #line or line marker. WriteLineInfo(LineNo, nullptr, 0); - StartedNewLine = true; - } - - if (StartedNewLine) { - EmittedTokensOnThisLine = false; - EmittedDirectiveOnThisLine = false; + } else { + // Okay, we're in -P mode, which turns off line markers. However, we still + // need to emit a newline between tokens on different lines. + startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); } CurLine = LineNo; - return StartedNewLine; + return true; } -void PrintPPOutputPPCallbacks::startNewLineIfNeeded() { +bool +PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) { if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { OS << '\n'; EmittedTokensOnThisLine = false; EmittedDirectiveOnThisLine = false; + if (ShouldUpdateCurrentLine) + ++CurLine; + return true; } + + return false; } /// FileChanged - Whenever the preprocessor enters or exits a #include file @@ -330,7 +273,7 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, if (Reason == PPCallbacks::EnterFile) { SourceLocation IncludeLoc = UserLoc.getIncludeLoc(); if (IncludeLoc.isValid()) - MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false); + MoveToLine(IncludeLoc); } else if (Reason == PPCallbacks::SystemHeaderPragma) { // GCC emits the # directive for this directive on the line AFTER the // directive and emits a bunch of spaces that aren't needed. This is because @@ -347,8 +290,7 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, FileType = NewFileType; if (DisableLineMarkers) { - if (!MinimizeWhitespace) - startNewLineIfNeeded(); + startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); return; } @@ -394,13 +336,15 @@ void PrintPPOutputPPCallbacks::InclusionDirective( // In -dI mode, dump #include directives prior to dumping their content or // interpretation. if (DumpIncludeDirectives) { - MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(HashLoc); const std::string TokenText = PP.getSpelling(IncludeTok); assert(!TokenText.empty()); OS << "#" << TokenText << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " /* clang -E -dI */"; setEmittedDirectiveOnThisLine(); + startNewLineIfNeeded(); } // When preprocessing, turn implicit imports into module import pragmas. @@ -409,13 +353,17 @@ void PrintPPOutputPPCallbacks::InclusionDirective( case tok::pp_include: case tok::pp_import: case tok::pp_include_next: - MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(HashLoc); OS << "#pragma clang module import " << Imported->getFullModuleName(true) << " /* clang -E: implicit import for " << "#" << PP.getSpelling(IncludeTok) << " " << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') << " */"; - setEmittedDirectiveOnThisLine(); + // Since we want a newline after the pragma, but not a #, start a + // new line immediately. + EmittedTokensOnThisLine = true; + startNewLineIfNeeded(); break; case tok::pp___include_macros: @@ -450,11 +398,11 @@ void PrintPPOutputPPCallbacks::EndModule(const Module *M) { /// Ident - Handle #ident directives when read by the preprocessor. /// void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + MoveToLine(Loc); OS.write("#ident ", strlen("#ident ")); OS.write(S.begin(), S.size()); - setEmittedTokensOnThisLine(); + EmittedTokensOnThisLine = true; } /// MacroDefined - This hook is called whenever a macro definition is seen. @@ -466,7 +414,7 @@ void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, // Ignore __FILE__ etc. MI->isBuiltinMacro()) return; - MoveToLine(MI->getDefinitionLoc(), /*RequireStartOfLine=*/true); + MoveToLine(MI->getDefinitionLoc()); PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); setEmittedDirectiveOnThisLine(); } @@ -477,7 +425,7 @@ void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, // Only print out macro definitions in -dD mode. if (!DumpDefines) return; - MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true); + MoveToLine(MacroNameTok.getLocation()); OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); setEmittedDirectiveOnThisLine(); } @@ -498,7 +446,8 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, StringRef Namespace, PragmaMessageKind Kind, StringRef Str) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma "; if (!Namespace.empty()) OS << Namespace << ' '; @@ -523,7 +472,8 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, StringRef DebugType) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma clang __debug "; OS << DebugType; @@ -533,14 +483,16 @@ void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, void PrintPPOutputPPCallbacks:: PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic push"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic pop"; setEmittedDirectiveOnThisLine(); } @@ -549,7 +501,8 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, diag::Severity Map, StringRef Str) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic "; switch (Map) { case diag::Severity::Remark: @@ -575,7 +528,8 @@ void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, StringRef WarningSpec, ArrayRef Ids) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma warning(" << WarningSpec << ':'; for (ArrayRef::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) OS << ' ' << *I; @@ -585,7 +539,8 @@ void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, int Level) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma warning(push"; if (Level >= 0) OS << ", " << Level; @@ -594,14 +549,16 @@ void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, } void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma warning(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma character_execution_set(push"; if (!Str.empty()) OS << ", " << Str; @@ -610,80 +567,64 @@ void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, } void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma character_execution_set(pop)"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullBegin(SourceLocation Loc) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma clang assume_nonnull begin"; setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaAssumeNonNullEnd(SourceLocation Loc) { - MoveToLine(Loc, /*RequireStartOfLine=*/true); + startNewLineIfNeeded(); + MoveToLine(Loc); OS << "#pragma clang assume_nonnull end"; setEmittedDirectiveOnThisLine(); } -void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, - bool RequireSpace, - bool RequireSameLine) { - // These tokens are not expanded to anything and don't need whitespace before - // them. - if (Tok.is(tok::eof) || - (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && - !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end))) - return; +/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this +/// is called for the first token on each new line. If this really is the start +/// of a new logical line, handle it and return true, otherwise return false. +/// This may not be the start of a logical line because the "start of line" +/// marker is set for spelling lines, not expansion ones. +bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) { + // Figure out what line we went to and insert the appropriate number of + // newline characters. + if (!MoveToLine(Tok.getLocation())) + return false; - if (!RequireSameLine && MoveToLine(Tok, /*RequireStartOfLine=*/false)) { - if (MinimizeWhitespace) { - // Avoid interpreting hash as a directive under -fpreprocessed. - if (Tok.is(tok::hash)) - OS << ' '; - } else { - // Print out space characters so that the first token on a line is - // indented for easy reading. - unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); + // Print out space characters so that the first token on a line is + // indented for easy reading. + unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); - // The first token on a line can have a column number of 1, yet still - // expect leading white space, if a macro expansion in column 1 starts - // with an empty macro argument, or an empty nested macro expansion. In - // this case, move the token to column 2. - if (ColNo == 1 && Tok.hasLeadingSpace()) - ColNo = 2; + // The first token on a line can have a column number of 1, yet still expect + // leading white space, if a macro expansion in column 1 starts with an empty + // macro argument, or an empty nested macro expansion. In this case, move the + // token to column 2. + if (ColNo == 1 && Tok.hasLeadingSpace()) + ColNo = 2; - // This hack prevents stuff like: - // #define HASH # - // HASH define foo bar - // From having the # character end up at column 1, which makes it so it - // is not handled as a #define next time through the preprocessor if in - // -fpreprocessed mode. - if (ColNo <= 1 && Tok.is(tok::hash)) - OS << ' '; + // This hack prevents stuff like: + // #define HASH # + // HASH define foo bar + // From having the # character end up at column 1, which makes it so it + // is not handled as a #define next time through the preprocessor if in + // -fpreprocessed mode. + if (ColNo <= 1 && Tok.is(tok::hash)) + OS << ' '; - // Otherwise, indent the appropriate number of spaces. - for (; ColNo > 1; --ColNo) - OS << ' '; - } - } else { - // Insert whitespace between the previous and next token if either - // - The caller requires it - // - The input had whitespace between them and we are not in - // whitespace-minimization mode - // - The whitespace is necessary to keep the tokens apart and there is not - // already a newline between them - if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || - ((EmittedTokensOnThisLine || EmittedTokensOnThisLine) && - AvoidConcat(PrevPrevTok, PrevTok, Tok))) - OS << ' '; - } + // Otherwise, indent the appropriate number of spaces. + for (; ColNo > 1; --ColNo) + OS << ' '; - PrevPrevTok = PrevTok; - PrevTok = Tok; + return true; } void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr, @@ -727,9 +668,9 @@ struct UnknownPragmaHandler : public PragmaHandler { Token &PragmaTok) override { // Figure out what line we went to and insert the appropriate number of // newline characters. - Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true); + Callbacks->startNewLineIfNeeded(); + Callbacks->MoveToLine(PragmaTok.getLocation()); Callbacks->OS.write(Prefix, strlen(Prefix)); - Callbacks->setEmittedTokensOnThisLine(); if (ShouldExpandTokens) { // The first token does not have expanded macros. Expand them, if @@ -741,16 +682,21 @@ struct UnknownPragmaHandler : public PragmaHandler { /*IsReinject=*/false); PP.Lex(PragmaTok); } + Token PrevToken; + Token PrevPrevToken; + PrevToken.startToken(); + PrevPrevToken.startToken(); // Read and print all of the pragma tokens. - bool IsFirst = true; while (PragmaTok.isNot(tok::eod)) { - Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst, - /*RequireSameLine=*/true); - IsFirst = false; + if (PragmaTok.hasLeadingSpace() || + Callbacks->AvoidConcat(PrevPrevToken, PrevToken, PragmaTok)) + Callbacks->OS << ' '; std::string TokSpell = PP.getSpelling(PragmaTok); Callbacks->OS.write(&TokSpell[0], TokSpell.size()); - Callbacks->setEmittedTokensOnThisLine(); + + PrevPrevToken = PrevToken; + PrevToken = PragmaTok; if (ShouldExpandTokens) PP.Lex(PragmaTok); @@ -769,41 +715,44 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, bool DropComments = PP.getLangOpts().TraditionalCPP && !PP.getCommentRetentionState(); - bool IsStartOfLine = false; char Buffer[256]; + Token PrevPrevTok, PrevTok; + PrevPrevTok.startToken(); + PrevTok.startToken(); while (1) { - // Two lines joined with line continuation ('\' as last character on the - // line) must be emitted as one line even though Tok.getLine() returns two - // different values. In this situation Tok.isAtStartOfLine() is false even - // though it may be the first token on the lexical line. When - // dropping/skipping a token that is at the start of a line, propagate the - // start-of-line-ness to the next token to not append it to the previous - // line. - IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine(); + if (Callbacks->hasEmittedDirectiveOnThisLine()) { + Callbacks->startNewLineIfNeeded(); + Callbacks->MoveToLine(Tok.getLocation()); + } - Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false, - /*RequireSameLine=*/!IsStartOfLine); + // If this token is at the start of a line, emit newlines if needed. + if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) { + // done. + } else if (Tok.hasLeadingSpace() || + // If we haven't emitted a token on this line yet, PrevTok isn't + // useful to look at and no concatenation could happen anyway. + (Callbacks->hasEmittedTokensOnThisLine() && + // Don't print "-" next to "-", it would form "--". + Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) { + OS << ' '; + } if (DropComments && Tok.is(tok::comment)) { // Skip comments. Normally the preprocessor does not generate // tok::comment nodes at all when not keeping comments, but under // -traditional-cpp the lexer keeps /all/ whitespace, including comments. - PP.Lex(Tok); - continue; + SourceLocation StartLoc = Tok.getLocation(); + Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength())); } else if (Tok.is(tok::eod)) { // Don't print end of directive tokens, since they are typically newlines // that mess up our line tracking. These come from unknown pre-processor // directives or hash-prefixed comments in standalone assembly files. PP.Lex(Tok); - // FIXME: The token on the next line after #include should have - // Tok.isAtStartOfLine() set. - IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_include)) { // PrintPPOutputPPCallbacks::InclusionDirective handles producing // appropriate output here. Ignore this token entirely. PP.Lex(Tok); - IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_begin)) { // FIXME: We retrieve this token after the FileChanged callback, and @@ -815,13 +764,11 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, Callbacks->BeginModule( reinterpret_cast(Tok.getAnnotationValue())); PP.Lex(Tok); - IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_module_end)) { Callbacks->EndModule( reinterpret_cast(Tok.getAnnotationValue())); PP.Lex(Tok); - IsStartOfLine = true; continue; } else if (Tok.is(tok::annot_header_unit)) { // This is a header-name that has been (effectively) converted into a @@ -849,17 +796,8 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, // Tokens that can contain embedded newlines need to adjust our current // line number. - // FIXME: The token may end with a newline in which case - // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is - // wrong. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(TokPtr, Len); - if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' && - TokPtr[1] == '/') { - // It's a line comment; - // Ensure that we don't concatenate anything behind it. - Callbacks->setEmittedDirectiveOnThisLine(); - } } else { std::string S = PP.getSpelling(Tok); OS.write(S.data(), S.size()); @@ -868,17 +806,13 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, // line number. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) Callbacks->HandleNewlinesInToken(S.data(), S.size()); - if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') { - // It's a line comment; - // Ensure that we don't concatenate anything behind it. - Callbacks->setEmittedDirectiveOnThisLine(); - } } Callbacks->setEmittedTokensOnThisLine(); - IsStartOfLine = false; if (Tok.is(tok::eof)) break; + PrevPrevTok = PrevTok; + PrevTok = Tok; PP.Lex(Tok); } } @@ -936,8 +870,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives, - Opts.MinimizeWhitespace); + Opts.ShowIncludeDirectives, Opts.UseLineDirectives); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 3034af231e0e..64944492eb99 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2811,11 +2811,11 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { ConditionalStack.pop_back(); } + SourceLocation EndLoc = getSourceLocation(BufferEnd); // C99 5.1.1.2p2: If the file is non-empty and didn't end in a newline, issue // a pedwarn. if (CurPtr != BufferStart && (CurPtr[-1] != '\n' && CurPtr[-1] != '\r')) { DiagnosticsEngine &Diags = PP->getDiagnostics(); - SourceLocation EndLoc = getSourceLocation(BufferEnd); unsigned DiagID; if (LangOpts.CPlusPlus11) { @@ -2838,7 +2838,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { BufferPtr = CurPtr; // Finally, let the preprocessor handle this. - return PP->HandleEndOfFile(Result, isPragmaLexer()); + return PP->HandleEndOfFile(Result, EndLoc, isPragmaLexer()); } /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 556dd8daf652..3fa8746653b0 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2022,6 +2022,10 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile, LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled); + // Record the header's filename for later use. + if (File) + CurLexer->addInclude(OriginalFilename, File->getFileEntry(), FilenameLoc); + if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) { if (File && isPCHThroughHeader(&File->getFileEntry())) SkippingUntilPCHThroughHeader = false; diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index b979b965f46a..16170969a322 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" @@ -22,6 +23,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/Path.h" + using namespace clang; //===----------------------------------------------------------------------===// @@ -299,10 +301,46 @@ void Preprocessor::diagnoseMissingHeaderInUmbrellaDir(const Module &Mod) { } } +void Preprocessor::ResolvePragmaIncludeInstead( + const SourceLocation Location) const { + assert(Location.isValid()); + if (CurLexer == nullptr) + return; + + if (SourceMgr.isInSystemHeader(Location)) + return; + + for (const auto &Include : CurLexer->getIncludeHistory()) { + StringRef Filename = Include.getKey(); + const PreprocessorLexer::IncludeInfo &Info = Include.getValue(); + ArrayRef> Aliases = + HeaderInfo.getFileInfo(Info.File).Aliases.getArrayRef(); + + if (Aliases.empty()) + continue; + + switch (Aliases.size()) { + case 1: + Diag(Info.Location, diag::err_pragma_include_instead_system_reserved) + << Filename << 0 << Aliases[0]; + continue; + case 2: + Diag(Info.Location, diag::err_pragma_include_instead_system_reserved) + << Filename << 1 << Aliases[0] << Aliases[1]; + continue; + default: { + Diag(Info.Location, diag::err_pragma_include_instead_system_reserved) + << Filename << 2 << ("{'" + llvm::join(Aliases, "', '") + "'}"); + } + } + } +} + /// HandleEndOfFile - This callback is invoked when the lexer hits the end of /// the current file. This either returns the EOF token or pops a level off /// the include stack and keeps going. -bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { +bool Preprocessor::HandleEndOfFile(Token &Result, SourceLocation EndLoc, + bool isEndOfMacro) { assert(!CurTokenLexer && "Ending a file when currently in a macro!"); @@ -372,6 +410,9 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { } } + if (EndLoc.isValid()) + ResolvePragmaIncludeInstead(EndLoc); + // Complain about reaching a true EOF within arc_cf_code_audited. // We don't want to complain about reaching the end of a macro // instantiation or a _Pragma. @@ -560,7 +601,7 @@ bool Preprocessor::HandleEndOfTokenLexer(Token &Result) { TokenLexerCache[NumCachedTokenLexers++] = std::move(CurTokenLexer); // Handle this like a #include file being popped off the stack. - return HandleEndOfFile(Result, true); + return HandleEndOfFile(Result, {}, true); } /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index c89061ba6d02..27765af34fed 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -13,6 +13,7 @@ #include "clang/Lex/Pragma.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticLex.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" @@ -35,11 +36,12 @@ #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" @@ -495,43 +497,88 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { SrcMgr::C_System); } -/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. -void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { +static llvm::Optional LexHeader(Preprocessor &PP, + Optional &File, + bool SuppressIncludeNotFoundError) { Token FilenameTok; - if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false)) - return; + if (PP.LexHeaderName(FilenameTok, /*AllowConcatenation*/ false)) + return llvm::None; // If the next token wasn't a header-name, diagnose the error. if (FilenameTok.isNot(tok::header_name)) { - Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); - return; + PP.Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + return llvm::None; } // Reserve a buffer to get the spelling. SmallString<128> FilenameBuffer; bool Invalid = false; - StringRef Filename = getSpelling(FilenameTok, FilenameBuffer, &Invalid); + StringRef Filename = PP.getSpelling(FilenameTok, FilenameBuffer, &Invalid); if (Invalid) - return; + return llvm::None; bool isAngled = - GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + PP.GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); // If GetIncludeFilenameSpelling set the start ptr to null, there was an // error. if (Filename.empty()) - return; + return llvm::None; // Search include directories for this file. const DirectoryLookup *CurDir; - Optional File = - LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr, - nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, nullptr); + File = PP.LookupFile(FilenameTok.getLocation(), Filename, isAngled, nullptr, + nullptr, CurDir, nullptr, nullptr, nullptr, nullptr, + nullptr); if (!File) { if (!SuppressIncludeNotFoundError) - Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; + PP.Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; + return llvm::None; + } + + return FilenameTok; +} + +/// HandlePragmaIncludeInstead - Handle \#pragma clang include_instead(header). +void Preprocessor::HandlePragmaIncludeInstead(Token &Tok) { + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. + PreprocessorLexer *TheLexer = getCurrentFileLexer(); + + if (!SourceMgr.isInSystemHeader(Tok.getLocation())) { + Diag(Tok, diag::err_pragma_include_instead_not_sysheader); return; } + Lex(Tok); + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected) << "("; + return; + } + + Optional File; + llvm::Optional FilenameTok = + LexHeader(*this, File, SuppressIncludeNotFoundError); + if (!FilenameTok) + return; + + Lex(Tok); + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected) << ")"; + return; + } + + SmallString<128> FilenameBuffer; + StringRef Filename = getSpelling(*FilenameTok, FilenameBuffer); + HeaderInfo.AddFileAlias(TheLexer->getFileEntry(), Filename); +} + +/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. +void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { + Optional File; + llvm::Optional FilenameTok = + LexHeader(*this, File, SuppressIncludeNotFoundError); + if (!FilenameTok) + return; + const FileEntry *CurFile = getCurrentFileLexer()->getFileEntry(); // If this file is older than the file it depends on, emit a diagnostic. @@ -547,7 +594,7 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { // Remove the trailing ' ' if present. if (!Message.empty()) Message.erase(Message.end()-1); - Diag(FilenameTok, diag::pp_out_of_date_dependency) << Message; + Diag(*FilenameTok, diag::pp_out_of_date_dependency) << Message; } } @@ -1022,6 +1069,18 @@ struct PragmaSystemHeaderHandler : public PragmaHandler { } }; +/// PragmaIncludeInsteadHandler - "\#pragma clang include_instead(header)" marks +/// the current file as non-includable if the including header is not a system +/// header. +struct PragmaIncludeInsteadHandler : public PragmaHandler { + PragmaIncludeInsteadHandler() : PragmaHandler("include_instead") {} + + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &IIToken) override { + PP.HandlePragmaIncludeInstead(IIToken); + } +}; + struct PragmaDependencyHandler : public PragmaHandler { PragmaDependencyHandler() : PragmaHandler("dependency") {} @@ -1934,6 +1993,7 @@ void Preprocessor::RegisterBuiltinPragmas() { // #pragma clang ... AddPragmaHandler("clang", new PragmaPoisonHandler()); AddPragmaHandler("clang", new PragmaSystemHeaderHandler()); + AddPragmaHandler("clang", new PragmaIncludeInsteadHandler()); AddPragmaHandler("clang", new PragmaDebugHandler()); AddPragmaHandler("clang", new PragmaDependencyHandler()); AddPragmaHandler("clang", new PragmaDiagnosticHandler("clang")); diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 32ea8791d29a..e376fff90432 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -716,12 +716,6 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { } // Update the token info (identifier info and appropriate token kind). - // FIXME: the raw_identifier may contain leading whitespace which is removed - // from the cleaned identifier token. The SourceLocation should be updated to - // refer to the non-whitespace character. For instance, the text "\\\nB" (a - // line continuation before 'B') is parsed as a single tok::raw_identifier and - // is cleaned to tok::identifier "B". After cleaning the token's length is - // still 3 and the SourceLocation refers to the location of the backslash. Identifier.setIdentifierInfo(II); if (getLangOpts().MSVCCompat && II->isCPlusPlusOperatorKeyword() && getSourceManager().isInSystemHeader(Identifier.getLocation())) diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index f4f5f461e3b6..939323517b4d 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3952,8 +3952,12 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); Tok.setKind(tok::identifier); goto DoneWithDeclSpec; - } - isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy); + } else if (!getLangOpts().OpenCLPipes) { + DiagID = diag::err_opencl_unknown_type_specifier; + PrevSpec = Tok.getIdentifierInfo()->getNameStart(); + isInvalid = true; + } else + isInvalid = DS.SetTypePipe(true, Loc, PrevSpec, DiagID, Policy); break; // We only need to enumerate each image type once. #define IMAGE_READ_WRITE_TYPE(Type, Id, Ext) @@ -5126,8 +5130,10 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { switch (Tok.getKind()) { default: return false; + // OpenCL 2.0 and later define this keyword. case tok::kw_pipe: - return getLangOpts().OpenCLPipe; + return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) || + getLangOpts().OpenCLCPlusPlus; case tok::identifier: // foo::bar // Unfortunate hack to support "Class.factoryMethod" notation. @@ -5656,7 +5662,9 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, if (Kind == tok::star || Kind == tok::caret) return true; - if (Kind == tok::kw_pipe && Lang.OpenCLPipe) + // OpenCL 2.0 and later define this keyword. + if (Kind == tok::kw_pipe && + ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus)) return true; if (!Lang.CPlusPlus) diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 5d3de06e9576..a54bd8719178 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -327,7 +327,8 @@ void Sema::Initialize() { if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { addImplicitTypedef("clk_event_t", Context.OCLClkEventTy); addImplicitTypedef("queue_t", Context.OCLQueueTy); - addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy); + if (getLangOpts().OpenCLPipes) + addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy); addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy)); addImplicitTypedef("atomic_uint", Context.getAtomicType(Context.UnsignedIntTy)); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b78331cdfe91..bca21b351c91 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1525,18 +1525,20 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { break; case DeclSpec::TST_float: Result = Context.FloatTy; break; case DeclSpec::TST_double: - if (S.getLangOpts().OpenCL) { - if (!S.getOpenCLOptions().isSupported("cl_khr_fp64", S.getLangOpts())) - S.Diag(DS.getTypeSpecTypeLoc(), - diag::err_opencl_double_requires_extension) - << (S.getLangOpts().OpenCLVersion >= 300); - else if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp64", S.getLangOpts())) - S.Diag(DS.getTypeSpecTypeLoc(), diag::ext_opencl_double_without_pragma); - } if (DS.getTypeSpecWidth() == TypeSpecifierWidth::Long) Result = Context.LongDoubleTy; else Result = Context.DoubleTy; + if (S.getLangOpts().OpenCL) { + if (!S.getOpenCLOptions().isSupported("cl_khr_fp64", S.getLangOpts())) + S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension) + << 0 << Result + << (S.getLangOpts().OpenCLVersion == 300 + ? "cl_khr_fp64 and __opencl_c_fp64" + : "cl_khr_fp64"); + else if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp64", S.getLangOpts())) + S.Diag(DS.getTypeSpecTypeLoc(), diag::ext_opencl_double_without_pragma); + } break; case DeclSpec::TST_float128: if (!S.Context.getTargetInfo().hasFloat128Type() && @@ -1724,21 +1726,28 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { if (S.getLangOpts().OpenCL) { const auto &OpenCLOptions = S.getOpenCLOptions(); - StringRef OptName; + bool IsOpenCLC30 = (S.getLangOpts().OpenCLVersion == 300); // OpenCL C v3.0 s6.3.3 - OpenCL image types require __opencl_c_images - // support + // support. + // OpenCL C v3.0 s6.2.1 - OpenCL 3d image write types requires support + // for OpenCL C 2.0, or OpenCL C 3.0 or newer and the + // __opencl_c_3d_image_writes feature. OpenCL C v3.0 API s4.2 - For devices + // that support OpenCL 3.0, cl_khr_3d_image_writes must be returned when and + // only when the optional feature is supported if ((Result->isImageType() || Result->isSamplerT()) && - (S.getLangOpts().OpenCLVersion >= 300 && - !OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) - OptName = "__opencl_c_images"; - else if (Result->isOCLImage3dWOType() && - !OpenCLOptions.isSupported("cl_khr_3d_image_writes", - S.getLangOpts())) - OptName = "cl_khr_3d_image_writes"; - - if (!OptName.empty()) { + (IsOpenCLC30 && + !OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) { S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension) - << 0 << Result << OptName; + << 0 << Result << "__opencl_c_images"; + declarator.setInvalidType(); + } else if (Result->isOCLImage3dWOType() && + !OpenCLOptions.isSupported("cl_khr_3d_image_writes", + S.getLangOpts())) { + S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension) + << 0 << Result + << (IsOpenCLC30 + ? "cl_khr_3d_image_writes and __opencl_c_3d_image_writes" + : "cl_khr_3d_image_writes"); declarator.setInvalidType(); } } diff --git a/libcxx/include/format b/libcxx/include/format index 0ec4b85ca0a5..cfd851aa9a3d 100644 --- a/libcxx/include/format +++ b/libcxx/include/format @@ -55,14 +55,14 @@ namespace std { */ +// Make sure all feature tests macros are always available. +#include +// Only enable the contents of the header when libc++ was build with LIBCXX_ENABLE_INCOMPLETE_FEATURES enabled +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT) + #include <__config> #include <__format/format_error.h> #include <__format/format_parse_context.h> -#include - -#if defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT) -# error "The Format library is not supported since libc++ has been configured with LIBCXX_ENABLE_INCOMPLETE_FEATURES disabled" -#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -81,4 +81,6 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_FORMAT) + #endif // _LIBCPP_FORMAT diff --git a/libcxx/include/ranges b/libcxx/include/ranges index 47f66fd3f622..5feaf4c322fc 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -160,6 +160,11 @@ namespace std::ranges { */ +// Make sure all feature tests macros are always available. +#include +// Only enable the contents of the header when libc++ was build with LIBCXX_ENABLE_INCOMPLETE_FEATURES enabled +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + #include <__config> #include <__ranges/access.h> #include <__ranges/all.h> @@ -181,11 +186,6 @@ namespace std::ranges { #include // Required by the standard. #include // Required by the standard. #include -#include - -#if defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) -# error "The Ranges library is not supported since libc++ has been configured with LIBCXX_ENABLE_INCOMPLETE_FEATURES disabled" -#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -204,4 +204,6 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + #endif // _LIBCPP_RANGES diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 9144347045b9..a996a815599a 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -38,6 +38,10 @@ enum ELFKind { ELF64BEKind }; +// For -Bno-symbolic, -Bsymbolic-non-weak-functions, -Bsymbolic-functions, +// -Bsymbolic. +enum class BsymbolicKind { None, NonWeakFunctions, Functions, All }; + // For --build-id. enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; @@ -144,8 +148,7 @@ struct Configuration { bool armHasMovtMovw = false; bool armJ1J2BranchEncoding = false; bool asNeeded = false; - bool bsymbolic = false; - bool bsymbolicFunctions = false; + BsymbolicKind bsymbolic = BsymbolicKind::None; bool callGraphProfileSort; bool checkSections; bool checkDynamicRelocs; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index a15959158653..91e7df21a60a 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1006,12 +1006,15 @@ static void readConfigs(opt::InputArgList &args) { OPT_no_allow_multiple_definition, false) || hasZOption(args, "muldefs"); config->auxiliaryList = args::getStrings(args, OPT_auxiliary); - if (opt::Arg *arg = args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_functions, - OPT_Bsymbolic)) { - if (arg->getOption().matches(OPT_Bsymbolic_functions)) - config->bsymbolicFunctions = true; + if (opt::Arg *arg = + args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions, + OPT_Bsymbolic_functions, OPT_Bsymbolic)) { + if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions)) + config->bsymbolic = BsymbolicKind::NonWeakFunctions; + else if (arg->getOption().matches(OPT_Bsymbolic_functions)) + config->bsymbolic = BsymbolicKind::Functions; else if (arg->getOption().matches(OPT_Bsymbolic)) - config->bsymbolic = true; + config->bsymbolic = BsymbolicKind::All; } config->checkSections = args.hasFlag(OPT_check_sections, OPT_no_check_sections, true); @@ -1374,7 +1377,8 @@ static void readConfigs(opt::InputArgList &args) { // When producing an executable, --dynamic-list specifies non-local defined // symbols which are required to be exported. When producing a shared object, // symbols not specified by --dynamic-list are non-preemptible. - config->symbolic = config->bsymbolic || args.hasArg(OPT_dynamic_list); + config->symbolic = + config->bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list); for (auto *arg : args.filtered(OPT_dynamic_list)) if (Optional buffer = readFile(arg->getValue())) readDynamicList(*buffer); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index bedcf43bbe85..f0e4c11b79eb 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -43,6 +43,9 @@ def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind default visibility defined symbols def Bsymbolic_functions: F<"Bsymbolic-functions">, HelpText<"Bind default visibility defined function symbols locally for -shared">; +def Bsymbolic_non_weak_functions: F<"Bsymbolic-non-weak-functions">, + HelpText<"Bind default visibility defined STB_GLOBAL function symbols locally for -shared">; + def Bdynamic: F<"Bdynamic">, HelpText<"Link against shared libraries (default)">; def Bstatic: F<"Bstatic">, HelpText<"Do not link against shared libraries">; diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 1039be369d9e..496be33dd182 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -368,8 +368,12 @@ bool elf::computeIsPreemptible(const Symbol &sym) { // If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is // specified and the symbol is STT_FUNC, the symbol is preemptible iff it is - // in the dynamic list. - if (config->symbolic || (config->bsymbolicFunctions && sym.isFunc())) + // in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of + // -Bsymbolic-functions. + if (config->symbolic || + (config->bsymbolic == BsymbolicKind::Functions && sym.isFunc()) || + (config->bsymbolic == BsymbolicKind::NonWeakFunctions && sym.isFunc() && + sym.binding != STB_WEAK)) return sym.inDynamicList; return true; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 3496df1d2814..187b2ac90c21 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1356,7 +1356,7 @@ template void DynamicSection::finalizeContents() { // Set DT_FLAGS and DT_FLAGS_1. uint32_t dtFlags = 0; uint32_t dtFlags1 = 0; - if (config->bsymbolic) + if (config->bsymbolic == BsymbolicKind::All) dtFlags |= DF_SYMBOLIC; if (config->zGlobal) dtFlags1 |= DF_1_GLOBAL; diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 40439c995f17..a52ee4348f78 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -30,6 +30,8 @@ ELF Improvements (`D102461 `_) * A new linker script command ``OVERWRITE_SECTIONS`` has been added. (`D103303 `_) +* ``-Bsymbolic-non-weak-functions`` has been added as a ``STB_GLOBAL`` subset of ``-Bsymbolic-functions``. + (`D102570 `_) Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index ba3b0779e699..bd67e58daa4d 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -85,6 +85,9 @@ flag. .It Fl Bsymbolic-functions Bind default visibility defined function symbols locally for .Fl shared. +.It Fl Bsymbolic-non-weak-functions +Bind default visibility defined STB_GLOBAL function symbols locally for +.Fl shared. .It Fl -build-id Ns = Ns Ar value Generate a build ID note. .Ar value diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 692dc4d7d4cf..a4955e2a973a 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1396,6 +1396,11 @@ public: return NVT; } + virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const { + return getValueType(DL, Ty, AllowUnknown); + } + /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM /// operations except for the pointer size. If AllowUnknown is true, this /// will return MVT::Other for types with no EVT counterpart (e.g. structs), diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 0e88e705e16b..8bacf687ac76 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -216,6 +216,7 @@ def untyped : ValueType<8, 174>; // Produces an untyped value def funcref : ValueType<0, 175>; // WebAssembly's funcref type def externref : ValueType<0, 176>; // WebAssembly's externref type def x86amx : ValueType<8192, 177>; // X86 AMX value +def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 31f2d5a48183..5c73cece85c3 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -270,9 +270,10 @@ namespace llvm { funcref = 175, // WebAssembly's funcref type externref = 176, // WebAssembly's externref type x86amx = 177, // This is an X86 AMX value + i64x8 = 178, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = x86amx, // This always remains at the end of the list. + LAST_VALUETYPE = i64x8, // This always remains at the end of the list. VALUETYPE_SIZE = LAST_VALUETYPE + 1, // This is the current maximum for LAST_VALUETYPE. @@ -987,6 +988,7 @@ namespace llvm { case nxv16f16: case nxv8f32: case nxv4f64: return TypeSize::Scalable(256); + case i64x8: case v512i1: case v64i8: case v32i16: diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 4a8818f2e2a8..c3a609ee4fe1 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -893,9 +893,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getReturnType()->isIntegerTy(32); case LibFunc_snprintf: - return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && - FTy.getParamType(2)->isPointerTy() && - FTy.getReturnType()->isIntegerTy(32)); + return NumParams == 3 && FTy.getParamType(0)->isPointerTy() && + IsSizeTTy(FTy.getParamType(1)) && + FTy.getParamType(2)->isPointerTy() && + FTy.getReturnType()->isIntegerTy(32); case LibFunc_snprintf_chk: return NumParams == 5 && FTy.getParamType(0)->isPointerTy() && diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index bb4d41cfd69f..4ae427484945 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -325,7 +325,8 @@ bool InlineAsmLowering::lowerInlineAsm( return false; } - OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT(); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT(); } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); @@ -334,13 +335,17 @@ bool InlineAsmLowering::lowerInlineAsm( TLI->getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } + if (OpInfo.ConstraintVT == MVT::i64x8) + return false; + // Compute the constraint code and ConstraintType to use. computeConstraintToUse(TLI, OpInfo); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b104e995019f..1bba7232eb14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2439,9 +2439,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) - if (N0.getOpcode() == ISD::ADD || - N0.getOpcode() == ISD::UADDO || - N0.getOpcode() == ISD::SADDO) { + if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d56d4bcc9169..a08548393979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8176,7 +8176,7 @@ public: } } - return TLI.getValueType(DL, OpTy, true); + return TLI.getAsmOperandValueType(DL, OpTy, true); } }; @@ -8479,8 +8479,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); + OpInfo.ConstraintVT = TLI.getAsmOperandValueType( + DAG.getDataLayout(), Call.getType()).getSimpleVT(); } ++ResNo; } else { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1c1dae8f953f..5e1786958b6f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4687,7 +4687,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; break; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 9daebfd9e63d..4876b9e23717 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -167,6 +167,7 @@ std::string EVT::getEVTString() const { case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; case MVT::x86amx: return "x86amx"; + case MVT::i64x8: return "i64x8"; case MVT::Metadata: return "Metadata"; case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; @@ -198,6 +199,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); + case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: return PointerType::get(StructType::create(Context), 10); case MVT::funcref: diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index adefe3b37ee0..3ab9b250749a 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -653,6 +653,9 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, case 'x': Reg = getXRegFromWReg(Reg); break; + case 't': + Reg = getXRegFromXRegTuple(Reg); + break; } O << AArch64InstPrinter::getRegisterName(Reg); @@ -749,6 +752,10 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, AArch64::GPR64allRegClass.contains(Reg)) return printAsmMRegister(MO, 'x', O); + // If this is an x register tuple, print an x register. + if (AArch64::GPR64x8ClassRegClass.contains(Reg)) + return printAsmMRegister(MO, 't', O); + unsigned AltName = AArch64::NoRegAltName; const TargetRegisterClass *RegClass; if (AArch64::ZPRRegClass.contains(Reg)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ae702eedcd66..ca6b87a5ebb0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -246,6 +246,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); + if (Subtarget->hasLS64()) { + addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass); + setOperationAction(ISD::LOAD, MVT::i64x8, Custom); + setOperationAction(ISD::STORE, MVT::i64x8, Custom); + } + if (Subtarget->hasFPARMv8()) { addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass); @@ -2023,6 +2029,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::LASTA) MAKE_CASE(AArch64ISD::LASTB) MAKE_CASE(AArch64ISD::REINTERPRET_CAST) + MAKE_CASE(AArch64ISD::LS64_BUILD) + MAKE_CASE(AArch64ISD::LS64_EXTRACT) MAKE_CASE(AArch64ISD::TBL) MAKE_CASE(AArch64ISD::FADD_PRED) MAKE_CASE(AArch64ISD::FADDA_PRED) @@ -4611,17 +4619,51 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, StoreNode->getMemoryVT(), StoreNode->getMemOperand()); return Result; + } else if (MemVT == MVT::i64x8) { + SDValue Value = StoreNode->getValue(); + assert(Value->getValueType(0) == MVT::i64x8); + SDValue Chain = StoreNode->getChain(); + SDValue Base = StoreNode->getBasePtr(); + EVT PtrVT = Base.getValueType(); + for (unsigned i = 0; i < 8; i++) { + SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64, + Value, DAG.getConstant(i, Dl, MVT::i32)); + SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base, + DAG.getConstant(i * 8, Dl, PtrVT)); + Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(), + StoreNode->getOriginalAlign()); + } + return Chain; } return SDValue(); } -// Custom lowering for extending v4i8 vector loads. SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *LoadNode = cast(Op); assert(LoadNode && "Expected custom lowering of a load node"); + + if (LoadNode->getMemoryVT() == MVT::i64x8) { + SmallVector Ops; + SDValue Base = LoadNode->getBasePtr(); + SDValue Chain = LoadNode->getChain(); + EVT PtrVT = Base.getValueType(); + for (unsigned i = 0; i < 8; i++) { + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, + DAG.getConstant(i * 8, DL, PtrVT)); + SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr, + LoadNode->getPointerInfo(), + LoadNode->getOriginalAlign()); + Ops.push_back(Part); + Chain = SDValue(Part.getNode(), 1); + } + SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops); + return DAG.getMergeValues({Loaded, Chain}, DL); + } + + // Custom lowering for extending v4i8 vector loads. EVT VT = Op->getValueType(0); assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32"); @@ -8179,6 +8221,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( case 'r': if (VT.isScalableVector()) return std::make_pair(0U, nullptr); + if (Subtarget->hasLS64() && VT.getSizeInBits() == 512) + return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass); if (VT.getFixedSizeInBits() == 64) return std::make_pair(0U, &AArch64::GPR64commonRegClass); return std::make_pair(0U, &AArch64::GPR32commonRegClass); @@ -8266,6 +8310,15 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return Res; } +EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL, + llvm::Type *Ty, + bool AllowUnknown) const { + if (Subtarget->hasLS64() && Ty->isIntegerTy(512)) + return EVT(MVT::i64x8); + + return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown); +} + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void AArch64TargetLowering::LowerAsmOperandForConstraint( diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 386e1c2d8400..2b337255fc27 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -330,6 +330,10 @@ enum NodeType : unsigned { // Cast between vectors of the same element type but differ in length. REINTERPRET_CAST, + // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa + LS64_BUILD, + LS64_EXTRACT, + LD1_MERGE_ZERO, LD1S_MERGE_ZERO, LDNF1_MERGE_ZERO, @@ -824,6 +828,9 @@ public: bool isAllActivePredicate(SDValue N) const; EVT getPromotedVTForPredicate(EVT VT) const; + EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const override; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 682cec361728..12744e4de09b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8092,6 +8092,20 @@ let AddedComplexity = 10 in { // FIXME: add SVE dot-product patterns. } +// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, +// so that it can be used as input to inline asm, and vice versa. +def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; +def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; +def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, + GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), + (REG_SEQUENCE GPR64x8Class, + $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, + $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; +foreach i = 0-7 in { + def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), + (EXTRACT_SUBREG $val, !cast("x8sub_"#i))>; +} + let Predicates = [HasLS64] in { def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), (outs GPR64x8:$Rt)>; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 07dee3ce1fbc..67680e356683 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -732,7 +732,9 @@ def Tuples8X : RegisterTuples< !foreach(i, [0,1,2,3,4,5,6,7], !cast("x8sub_"#i)), !foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>; -def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>; +def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> { + let Size = 512; +} def GPR64x8AsmOp : AsmOperandClass { let Name = "GPR64x8"; let ParserMethod = "tryParseGPR64x8"; diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index ce6866154242..d168c2a84bbe 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -106,6 +106,25 @@ inline static unsigned getXRegFromWReg(unsigned Reg) { return Reg; } +inline static unsigned getXRegFromXRegTuple(unsigned RegTuple) { + switch (RegTuple) { + case AArch64::X0_X1_X2_X3_X4_X5_X6_X7: return AArch64::X0; + case AArch64::X2_X3_X4_X5_X6_X7_X8_X9: return AArch64::X2; + case AArch64::X4_X5_X6_X7_X8_X9_X10_X11: return AArch64::X4; + case AArch64::X6_X7_X8_X9_X10_X11_X12_X13: return AArch64::X6; + case AArch64::X8_X9_X10_X11_X12_X13_X14_X15: return AArch64::X8; + case AArch64::X10_X11_X12_X13_X14_X15_X16_X17: return AArch64::X10; + case AArch64::X12_X13_X14_X15_X16_X17_X18_X19: return AArch64::X12; + case AArch64::X14_X15_X16_X17_X18_X19_X20_X21: return AArch64::X14; + case AArch64::X16_X17_X18_X19_X20_X21_X22_X23: return AArch64::X16; + case AArch64::X18_X19_X20_X21_X22_X23_X24_X25: return AArch64::X18; + case AArch64::X20_X21_X22_X23_X24_X25_X26_X27: return AArch64::X20; + case AArch64::X22_X23_X24_X25_X26_X27_X28_FP: return AArch64::X22; + } + // For anything else, return it unchanged. + return RegTuple; +} + static inline unsigned getBRegFromDReg(unsigned Reg) { switch (Reg) { case AArch64::D0: return AArch64::B0; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d37ed584d9d2..294532011650 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5814,6 +5814,13 @@ static SDValue performANY_EXTENDCombine(SDNode *N, break; } + // Only handle cases where the result is used by a CopyToReg that likely + // means the value is a liveout of the basic block. This helps prevent + // infinite combine loops like PR51206. + if (none_of(N->uses(), + [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; })) + return SDValue(); + SmallVector SetCCs; for (SDNode::use_iterator UI = Src.getNode()->use_begin(), UE = Src.getNode()->use_end(); diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp index c77769368ede..66c9d9f0902a 100644 --- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp @@ -272,9 +272,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, if (PredBB && IsSafeToHoist(RemInst, RemBB) && IsSafeToHoist(DivInst, DivBB) && - llvm::all_of(successors(PredBB), [&](BasicBlock *BB) { - return BB == DivBB || BB == RemBB; - })) { + all_of(successors(PredBB), + [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) && + all_of(predecessors(DivBB), + [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) { DivDominates = true; DivInst->moveBefore(PredBB->getTerminator()); Changed = true; diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 7311819f77ff..137f99078faf 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -77,6 +77,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::ppcf128: return "MVT::ppcf128"; case MVT::x86mmx: return "MVT::x86mmx"; case MVT::x86amx: return "MVT::x86amx"; + case MVT::i64x8: return "MVT::i64x8"; case MVT::Glue: return "MVT::Glue"; case MVT::isVoid: return "MVT::isVoid"; case MVT::v1i1: return "MVT::v1i1"; From d545c2ce5ad1891282e8818b47ffe557c76a86b4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 21 Aug 2021 23:27:36 +0200 Subject: [PATCH 3/3] Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc1-97-g23ba3732246a. --- clang/include/clang/Basic/BuiltinsAArch64.def | 3 + clang/include/clang/Basic/LangOptions.h | 6 + clang/include/clang/Driver/Options.td | 6 +- clang/include/clang/Lex/PreprocessorOptions.h | 3 - clang/include/clang/Sema/Sema.h | 3 +- clang/lib/AST/ASTContext.cpp | 18 +- clang/lib/AST/Expr.cpp | 7 +- clang/lib/Basic/LangOptions.cpp | 8 + clang/lib/CodeGen/CGBuiltin.cpp | 23 + clang/lib/CodeGen/CGDeclCXX.cpp | 18 +- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 38 +- clang/lib/Driver/ToolChains/Hexagon.cpp | 32 +- clang/lib/Driver/ToolChains/MinGW.cpp | 7 +- clang/lib/Frontend/CompilerInvocation.cpp | 18 +- clang/lib/Headers/intrin.h | 3 + clang/lib/Lex/PPMacroExpansion.cpp | 11 +- clang/lib/Sema/SemaConcept.cpp | 17 +- clang/lib/Sema/SemaDeclCXX.cpp | 5 +- clang/lib/Sema/SemaTemplate.cpp | 9 +- clang/lib/Sema/SemaTemplateInstantiate.cpp | 19 +- compiler-rt/include/profile/InstrProfData.inc | 4 +- .../lib/profile/InstrProfilingBuffer.c | 2 +- compiler-rt/lib/profile/InstrProfilingMerge.c | 11 +- .../lib/profile/InstrProfilingPlatformLinux.c | 19 + libcxx/include/__config | 11 + libcxx/include/ctime | 2 +- libcxx/include/ios | 7 + lld/ELF/Config.h | 3 +- lld/ELF/Driver.cpp | 55 +- lld/ELF/LinkerScript.cpp | 36 +- lld/ELF/LinkerScript.h | 2 +- lld/ELF/Relocations.cpp | 7 + lld/ELF/ScriptParser.cpp | 9 +- lld/ELF/SymbolTable.cpp | 118 ++- lld/ELF/SymbolTable.h | 10 +- lld/ELF/Symbols.cpp | 3 + lld/docs/ReleaseNotes.rst | 144 ++- .../Commands/CommandObjectMemoryTag.cpp | 182 +++- lldb/source/Commands/Options.td | 8 + .../GDBRemoteCommunicationServerLLGS.cpp | 28 +- lldb/source/Symbol/TypeSystem.cpp | 90 +- llvm/include/llvm/Analysis/ValueTracking.h | 4 + llvm/include/llvm/IR/Module.h | 3 + llvm/include/llvm/ProfileData/InstrProf.h | 1 + .../llvm/ProfileData/InstrProfData.inc | 4 +- llvm/include/llvm/Transforms/IPO/Attributor.h | 20 + .../llvm/Transforms/Utils/PredicateInfo.h | 6 +- .../Utils/ScalarEvolutionExpander.h | 5 + llvm/lib/Analysis/InstructionSimplify.cpp | 16 + llvm/lib/Analysis/ValueTracking.cpp | 10 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 11 +- llvm/lib/IR/ConstantFold.cpp | 241 ----- llvm/lib/IR/Module.cpp | 4 + llvm/lib/ProfileData/InstrProfReader.cpp | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 18 +- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 34 +- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 2 +- llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 18 + .../Target/PowerPC/PPCTargetTransformInfo.cpp | 3 + llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 989 +++++++++++++----- llvm/lib/Target/RISCV/RISCVSchedRocket.td | 3 + llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 3 + llvm/lib/Target/RISCV/RISCVSchedule.td | 1 + llvm/lib/Target/RISCV/RISCVScheduleV.td | 820 +++++++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +- llvm/lib/Target/X86/X86InstrArithmetic.td | 28 +- llvm/lib/Transforms/IPO/Attributor.cpp | 117 ++- .../Transforms/IPO/AttributorAttributes.cpp | 54 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 16 +- .../InstCombine/InstCombineCompares.cpp | 123 ++- .../InstCombineLoadStoreAlloca.cpp | 8 +- .../InstCombine/InstCombineSelect.cpp | 3 +- .../Transforms/Scalar/LoopStrengthReduce.cpp | 492 +++++++-- llvm/lib/Transforms/Scalar/SROA.cpp | 5 +- llvm/lib/Transforms/Utils/PredicateInfo.cpp | 46 + .../Utils/ScalarEvolutionExpander.cpp | 5 +- .../Transforms/Vectorize/LoopVectorize.cpp | 46 + llvm/tools/llvm-mca/Views/TimelineView.cpp | 15 +- openmp/runtime/src/kmp_taskdeps.cpp | 27 +- openmp/runtime/src/kmp_taskdeps.h | 3 +- openmp/runtime/src/kmp_tasking.cpp | 1 + 83 files changed, 3225 insertions(+), 1009 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVScheduleV.td diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 1dac5d2371d4..634bcaed20a6 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_HEADER_BUILTIN diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 71cf0c65e692..b60b94a1ba08 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -354,6 +354,9 @@ public: /// A list of all -fno-builtin-* function names (e.g., memset). std::vector NoBuiltinFuncs; + /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE(). + std::map> MacroPrefixMap; + /// Triples of the OpenMP targets that the host code codegen should /// take into account in order to generate accurate offloading descriptors. std::vector OMPTargetTriples; @@ -460,6 +463,9 @@ public: } bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; } + + /// Remap path prefix according to -fmacro-prefix-path option. + void remapPathPrefix(SmallString<256> &Path) const; }; /// Floating point control options diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ab1a5487d9c0..a0cbcae0bdc3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2825,10 +2825,10 @@ def fcoverage_prefix_map_EQ HelpText<"remap file source paths in coverage mapping">; def ffile_prefix_map_EQ : Joined<["-"], "ffile-prefix-map=">, Group, - HelpText<"remap file source paths in debug info and predefined preprocessor macros">; + HelpText<"remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE()">; def fmacro_prefix_map_EQ - : Joined<["-"], "fmacro-prefix-map=">, Group, Flags<[CC1Option]>, - HelpText<"remap file source paths in predefined preprocessor macros">; + : Joined<["-"], "fmacro-prefix-map=">, Group, Flags<[CC1Option]>, + HelpText<"remap file source paths in predefined preprocessor macros and __builtin_FILE()">; defm force_dwarf_frame : BoolFOption<"force-dwarf-frame", CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse, PosFlag, NegFlag>; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 99085b98fc7a..a7aabc3e1df2 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -199,9 +199,6 @@ public: /// build it again. std::shared_ptr FailedModules; - /// A prefix map for __FILE__ and __BASE_FILE__. - std::map> MacroPrefixMap; - /// Contains the currently active skipped range mappings for skipping excluded /// conditional directives. /// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 83a2d132bf6a..d8b2546b81a3 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -7828,8 +7828,7 @@ public: TemplateArgumentLoc &Arg, SmallVectorImpl &Converted); - bool CheckTemplateArgument(TemplateTypeParmDecl *Param, - TypeSourceInfo *Arg); + bool CheckTemplateArgument(TypeSourceInfo *Arg); ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param, QualType InstantiatedParamType, Expr *Arg, TemplateArgument &Converted, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index e102a3ba508d..fdba204fbe7f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -6066,9 +6066,11 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const { NNS->getAsNamespaceAlias()->getNamespace() ->getOriginalNamespace()); + // The difference between TypeSpec and TypeSpecWithTemplate is that the + // latter will have the 'template' keyword when printed. case NestedNameSpecifier::TypeSpec: case NestedNameSpecifier::TypeSpecWithTemplate: { - QualType T = getCanonicalType(QualType(NNS->getAsType(), 0)); + const Type *T = getCanonicalType(NNS->getAsType()); // If we have some kind of dependent-named type (e.g., "typename T::type"), // break it apart into its prefix and identifier, then reconsititute those @@ -6078,14 +6080,16 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const { // typedef typename T::type T1; // typedef typename T1::type T2; if (const auto *DNT = T->getAs()) - return NestedNameSpecifier::Create(*this, DNT->getQualifier(), - const_cast(DNT->getIdentifier())); + return NestedNameSpecifier::Create( + *this, DNT->getQualifier(), + const_cast(DNT->getIdentifier())); + if (const auto *DTST = T->getAs()) + return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true, + const_cast(T)); - // Otherwise, just canonicalize the type, and force it to be a TypeSpec. - // FIXME: Why are TypeSpec and TypeSpecWithTemplate distinct in the - // first place? + // TODO: Set 'Template' parameter to true for other template types. return NestedNameSpecifier::Create(*this, nullptr, false, - const_cast(T.getTypePtr())); + const_cast(T)); } case NestedNameSpecifier::Global: diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index e8b4aaa2b81e..11f10d4695fc 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2233,8 +2233,11 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, }; switch (getIdentKind()) { - case SourceLocExpr::File: - return MakeStringLiteral(PLoc.getFilename()); + case SourceLocExpr::File: { + SmallString<256> Path(PLoc.getFilename()); + Ctx.getLangOpts().remapPathPrefix(Path); + return MakeStringLiteral(Path); + } case SourceLocExpr::Function: { const Decl *CurDecl = dyn_cast_or_null(Context); return MakeStringLiteral( diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index dc392d5352aa..bebf3178426f 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/LangOptions.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Path.h" using namespace clang; @@ -48,6 +50,12 @@ VersionTuple LangOptions::getOpenCLVersionTuple() const { return VersionTuple(Ver / 100, (Ver % 100) / 10); } +void LangOptions::remapPathPrefix(SmallString<256> &Path) const { + for (const auto &Entry : MacroPrefixMap) + if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second)) + break; +} + FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) { FPOptions result(LO); return result; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index d9b2a5fe16be..1a02965b223e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9732,6 +9732,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) { + llvm::Type *ResType = ConvertType(E->getType()); + llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); + + bool IsSigned = BuiltinID == AArch64::BI__mulh; + Value *LHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); + Value *RHS = + Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); + + Value *MulResult, *HigherBits; + if (IsSigned) { + MulResult = Builder.CreateNSWMul(LHS, RHS); + HigherBits = Builder.CreateAShr(MulResult, 64); + } else { + MulResult = Builder.CreateNUWMul(LHS, RHS); + HigherBits = Builder.CreateLShr(MulResult, 64); + } + HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); + + return HigherBits; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (Optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index d43fb99550a8..553fedebfe56 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -555,7 +555,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || - getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { + getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR || + D->hasAttr()) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data @@ -568,17 +569,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // group with the global being initialized. On most platforms, this is a // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. - AddGlobalCtor(Fn, 65535, COMDATKey); - if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { - // In The MS C++, MS add template static data member in the linker - // drective. - addUsedGlobal(COMDATKey); - } - } else if (D->hasAttr()) { + // // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded // too. + AddGlobalCtor(Fn, 65535, COMDATKey); + if (COMDATKey && (getTriple().isOSBinFormatELF() || + getTarget().getCXXABI().isMicrosoft())) { + // When COMDAT is used on ELF or in the MS C++ ABI, the key must be in + // llvm.used to prevent linker GC. + addUsedGlobal(COMDATKey); + } } else { I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash. if (I == DelayedCXXInitPosition.end()) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9b40b88ea3c9..49a1396b58e3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -186,7 +186,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, !getModule().getSourceFileName().empty()) { std::string Path = getModule().getSourceFileName(); // Check if a path substitution is needed from the MacroPrefixMap. - for (const auto &Entry : PPO.MacroPrefixMap) + for (const auto &Entry : LangOpts.MacroPrefixMap) if (Path.rfind(Entry.first, 0) != std::string::npos) { Path = Entry.second + Path.substr(Entry.first.size()); break; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1870bd81789c..4c8ba8cdcd29 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2637,7 +2637,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; - StringRef FPContract = "on"; + StringRef FPContract = ""; bool StrictFPModel = false; @@ -2662,7 +2662,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; // -fno_fast_math restores default denormal and fpcontract handling - FPContract = "on"; + FPContract = ""; DenormalFPMath = llvm::DenormalMode::getIEEE(); // FIXME: The target may have picked a non-IEEE default mode here based on @@ -2682,18 +2682,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // ffp-model= is a Driver option, it is entirely rewritten into more // granular options before being passed into cc1. // Use the gcc option in the switch below. - if (!FPModel.empty() && !FPModel.equals(Val)) + if (!FPModel.empty() && !FPModel.equals(Val)) { D.Diag(clang::diag::warn_drv_overriding_flag_option) << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); + FPContract = ""; + } if (Val.equals("fast")) { optID = options::OPT_ffast_math; FPModel = Val; - FPContract = Val; + FPContract = "fast"; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; - FPContract = "on"; + FPContract = "fast"; PreciseFPModel = true; } else if (Val.equals("strict")) { StrictFPModel = true; @@ -2779,11 +2781,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_ffp_contract: { StringRef Val = A->getValue(); if (PreciseFPModel) { - // When -ffp-model=precise is seen on the command line, - // the boolean PreciseFPModel is set to true which indicates - // "the current option is actually PreciseFPModel". The optID - // is changed to OPT_ffp_contract and FPContract is set to "on". - // the argument Val string is "precise": it shouldn't be checked. + // -ffp-model=precise enables ffp-contract=fast as a side effect + // the FPContract value has already been set to a string literal + // and the Val string isn't a pertinent value. ; } else if (Val.equals("fast") || Val.equals("on") || Val.equals("off")) FPContract = Val; @@ -2881,17 +2881,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); - FPContract = "on"; + FPContract = ""; break; } if (StrictFPModel) { // If -ffp-model=strict has been specified on command line but // subsequent options conflict then emit warning diagnostic. - if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath && - SignedZeros && TrappingMath && RoundingFPMath && - DenormalFPMath == llvm::DenormalMode::getIEEE() && - DenormalFP32Math == llvm::DenormalMode::getIEEE() && - FPContract.equals("off")) + if (HonorINFs && HonorNaNs && + !AssociativeMath && !ReciprocalMath && + SignedZeros && TrappingMath && RoundingFPMath && + (FPContract.equals("off") || FPContract.empty()) && + DenormalFPMath == llvm::DenormalMode::getIEEE() && + DenormalFP32Math == llvm::DenormalMode::getIEEE()) // OK: Current Arg doesn't conflict with -ffp-model=strict ; else { @@ -7690,8 +7691,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, assert(CurTC == nullptr && "Expected one dependence!"); CurTC = TC; }); + UB += C.addTempFile( + C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I]))); + } else { + UB += CurTC->getInputFilename(Inputs[I]); } - UB += CurTC->getInputFilename(Inputs[I]); } CmdArgs.push_back(TCArgs.MakeArgString(UB)); diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 828bfdbb05a3..314d0efce441 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -588,21 +588,43 @@ void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs, void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(options::OPT_nostdinc) || - DriverArgs.hasArg(options::OPT_nostdlibinc)) + if (DriverArgs.hasArg(options::OPT_nostdinc)) return; + const bool IsELF = !getTriple().isMusl() && !getTriple().isOSLinux(); + const bool IsLinuxMusl = getTriple().isMusl() && getTriple().isOSLinux(); + const Driver &D = getDriver(); - if (!D.SysRoot.empty()) { + SmallString<128> ResourceDirInclude(D.ResourceDir); + if (!IsELF) { + llvm::sys::path::append(ResourceDirInclude, "include"); + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && + (!IsLinuxMusl || DriverArgs.hasArg(options::OPT_nostdlibinc))) + addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); + } + if (DriverArgs.hasArg(options::OPT_nostdlibinc)) + return; + + const bool HasSysRoot = !D.SysRoot.empty(); + if (HasSysRoot) { SmallString<128> P(D.SysRoot); - if (getTriple().isMusl()) + if (IsLinuxMusl) llvm::sys::path::append(P, "usr/include"); else llvm::sys::path::append(P, "include"); + addExternCSystemInclude(DriverArgs, CC1Args, P.str()); - return; + // LOCAL_INCLUDE_DIR + addSystemInclude(DriverArgs, CC1Args, P + "/usr/local/include"); + // TOOL_INCLUDE_DIR + AddMultilibIncludeArgs(DriverArgs, CC1Args); } + if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && IsLinuxMusl) + addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); + + if (HasSysRoot) + return; std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(), D.PrefixDirs); addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include"); diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 20efbdc237a8..7ba729f36bd8 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -136,10 +136,13 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, llvm_unreachable("Unsupported target architecture."); } - if (Args.hasArg(options::OPT_mwindows)) { + Arg *SubsysArg = + Args.getLastArg(options::OPT_mwindows, options::OPT_mconsole); + if (SubsysArg && SubsysArg->getOption().matches(options::OPT_mwindows)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("windows"); - } else if (Args.hasArg(options::OPT_mconsole)) { + } else if (SubsysArg && + SubsysArg->getOption().matches(options::OPT_mconsole)) { CmdArgs.push_back("--subsystem"); CmdArgs.push_back("console"); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 33e5f3e99c45..7025028bc94a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3528,6 +3528,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA); else GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA); + + for (const auto &MP : Opts.MacroPrefixMap) + GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); } bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, @@ -4037,6 +4040,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, options::OPT_fno_experimental_relative_cxx_abi_vtables, TargetCXXABI::usesRelativeVTables(T)); + for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { + auto Split = StringRef(A).split('='); + Opts.MacroPrefixMap.insert( + {std::string(Split.first), std::string(Split.second)}); + } + return Diags.getNumErrors() == NumErrorsBefore; } @@ -4109,9 +4118,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn) GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA); - for (const auto &MP : Opts.MacroPrefixMap) - GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); - if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false)) GenerateArg(Args, OPT_preamble_bytes_EQ, Twine(Opts.PrecompiledPreambleBytes.first) + "," + @@ -4180,12 +4186,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); - for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { - auto Split = StringRef(A).split('='); - Opts.MacroPrefixMap.insert( - {std::string(Split.first), std::string(Split.second)}); - } - if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index ff8eb8fca268..34ec79d6acbc 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64); unsigned short __cdecl _byteswap_ushort(unsigned short val); unsigned long __cdecl _byteswap_ulong (unsigned long val); unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); + +__int64 __mulh(__int64 __a, __int64 __b); +unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 8728ac9e2166..d8ad9d845e7a 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1453,15 +1453,6 @@ static bool isTargetEnvironment(const TargetInfo &TI, return TI.getTriple().getEnvironment() == Env.getEnvironment(); } -static void remapMacroPath( - SmallString<256> &Path, - const std::map> - &MacroPrefixMap) { - for (const auto &Entry : MacroPrefixMap) - if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second)) - break; -} - /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1543,7 +1534,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } else { FN += PLoc.getFilename(); } - remapMacroPath(FN, PPOpts->MacroPrefixMap); + getLangOpts().remapPathPrefix(FN); Lexer::Stringify(FN); OS << '"' << FN << '"'; } diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index f2c70d0a56ef..931c9e3e2738 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -742,22 +742,15 @@ Optional NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef E) { assert(E.size() != 0); - auto First = fromConstraintExpr(S, D, E[0]); - if (E.size() == 1) - return First; - auto Second = fromConstraintExpr(S, D, E[1]); - if (!Second) + auto Conjunction = fromConstraintExpr(S, D, E[0]); + if (!Conjunction) return None; - llvm::Optional Conjunction; - Conjunction.emplace(S.Context, std::move(*First), std::move(*Second), - CCK_Conjunction); - for (unsigned I = 2; I < E.size(); ++I) { + for (unsigned I = 1; I < E.size(); ++I) { auto Next = fromConstraintExpr(S, D, E[I]); if (!Next) - return llvm::Optional{}; - NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction), + return None; + *Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction), std::move(*Next), CCK_Conjunction); - *Conjunction = std::move(NewConjunction); } return Conjunction; } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 83c97626ff7e..da4f4f862095 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -12472,6 +12472,8 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc, return false; } + const NestedNameSpecifier *CNNS = + Context.getCanonicalNestedNameSpecifier(Qual); for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) { NamedDecl *D = *I; @@ -12497,8 +12499,7 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc, // using decls differ if they name different scopes (but note that // template instantiation can cause this check to trigger when it // didn't before instantiation). - if (Context.getCanonicalNestedNameSpecifier(Qual) != - Context.getCanonicalNestedNameSpecifier(DQual)) + if (CNNS != Context.getCanonicalNestedNameSpecifier(DQual)) continue; Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange(); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 175388198324..5d26f2d2c11a 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1079,7 +1079,7 @@ NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename, return Param; // Check the template argument itself. - if (CheckTemplateArgument(Param, DefaultTInfo)) { + if (CheckTemplateArgument(DefaultTInfo)) { Param->setInvalidDecl(); return Param; } @@ -5042,7 +5042,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param, } } - if (CheckTemplateArgument(Param, TSI)) + if (CheckTemplateArgument(TSI)) return true; // Add the converted template type argument. @@ -5661,7 +5661,7 @@ bool Sema::CheckTemplateArgumentList( TemplateArgumentListInfo NewArgs = TemplateArgs; // Make sure we get the template parameter list from the most - // recentdeclaration, since that is the only one that has is guaranteed to + // recent declaration, since that is the only one that is guaranteed to // have all the default template argument information. TemplateParameterList *Params = cast(Template->getMostRecentDecl()) @@ -6208,8 +6208,7 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier( /// /// This routine implements the semantics of C++ [temp.arg.type]. It /// returns true if an error occurred, and false otherwise. -bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param, - TypeSourceInfo *ArgInfo) { +bool Sema::CheckTemplateArgument(TypeSourceInfo *ArgInfo) { assert(ArgInfo && "invalid TypeSourceInfo"); QualType Arg = ArgInfo->getType(); SourceRange SR = ArgInfo->getTypeLoc().getSourceRange(); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index f18f77d3442a..74889aa3ca88 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1934,25 +1934,23 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) { return Req; Sema::SFINAETrap Trap(SemaRef); - TemplateDeductionInfo Info(Req->getExpr()->getBeginLoc()); llvm::PointerUnion TransExpr; if (Req->isExprSubstitutionFailure()) TransExpr = Req->getExprSubstitutionDiagnostic(); else { - Sema::InstantiatingTemplate ExprInst(SemaRef, Req->getExpr()->getBeginLoc(), - Req, Info, - Req->getExpr()->getSourceRange()); + Expr *E = Req->getExpr(); + TemplateDeductionInfo Info(E->getBeginLoc()); + Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info, + E->getSourceRange()); if (ExprInst.isInvalid()) return nullptr; - ExprResult TransExprRes = TransformExpr(Req->getExpr()); + ExprResult TransExprRes = TransformExpr(E); if (TransExprRes.isInvalid() || Trap.hasErrorOccurred()) - TransExpr = createSubstDiag(SemaRef, Info, - [&] (llvm::raw_ostream& OS) { - Req->getExpr()->printPretty(OS, nullptr, - SemaRef.getPrintingPolicy()); - }); + TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) { + E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy()); + }); else TransExpr = TransExprRes.get(); } @@ -1966,6 +1964,7 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) { else if (RetReq.isTypeConstraint()) { TemplateParameterList *OrigTPL = RetReq.getTypeConstraintTemplateParameterList(); + TemplateDeductionInfo Info(OrigTPL->getTemplateLoc()); Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(), Req, Info, OrigTPL->getSourceRange()); if (TPLInst.isInvalid()) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 08a642469627..7d2097cfc297 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) @@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 7 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/compiler-rt/lib/profile/InstrProfilingBuffer.c b/compiler-rt/lib/profile/InstrProfilingBuffer.c index 21fa7ba1ddd6..68b4f5cd6f52 100644 --- a/compiler-rt/lib/profile/InstrProfilingBuffer.c +++ b/compiler-rt/lib/profile/InstrProfilingBuffer.c @@ -116,7 +116,7 @@ uint64_t __llvm_profile_get_size_for_buffer_internal( DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters, &PaddingBytesAfterNames); - return sizeof(__llvm_profile_header) + + return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + (DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters + (CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters + NamesSize + PaddingBytesAfterNames; diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 913228513259..16ebc2f8b2a9 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -22,6 +22,7 @@ void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *); COMPILER_RT_VISIBILITY uint64_t lprofGetLoadModuleSignature() { /* A very fast way to compute a module signature. */ + uint64_t Version = __llvm_profile_get_version(); uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() - __llvm_profile_begin_counters()); uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(), @@ -33,7 +34,7 @@ uint64_t lprofGetLoadModuleSignature() { const __llvm_profile_data *FirstD = __llvm_profile_begin_data(); return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) + - (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0); + (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version; } /* Returns 1 if profile is not structurally compatible. */ @@ -44,7 +45,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData, __llvm_profile_header *Header = (__llvm_profile_header *)ProfileData; __llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData; SrcDataStart = - (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header)); + (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + + Header->BinaryIdsSize); SrcDataEnd = SrcDataStart + Header->DataSize; if (ProfileSize < sizeof(__llvm_profile_header)) @@ -63,7 +65,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData, Header->ValueKindLast != IPVK_Last) return 1; - if (ProfileSize < sizeof(__llvm_profile_header) + + if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize + Header->DataSize * sizeof(__llvm_profile_data) + Header->NamesSize + Header->CountersSize) return 1; @@ -91,7 +93,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData, const char *SrcValueProfDataStart, *SrcValueProfData; SrcDataStart = - (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header)); + (__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) + + Header->BinaryIdsSize); SrcDataEnd = SrcDataStart + Header->DataSize; SrcCountersStart = (uint64_t *)SrcDataEnd; SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize); diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index 508624a80cd6..7c15f97aff89 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -17,6 +17,15 @@ #include "InstrProfiling.h" #include "InstrProfilingInternal.h" +#if defined(__FreeBSD__) && !defined(ElfW) +/* + * FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet. + * If this is added to all supported FreeBSD versions in the future, this + * compatibility macro can be removed. + */ +#define ElfW(type) __ElfN(type) +#endif + #define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON) #define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON) #define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON) @@ -76,6 +85,7 @@ COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) { COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START; COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP; +#ifdef NT_GNU_BUILD_ID static size_t RoundUp(size_t size, size_t align) { return (size + align - 1) & ~(align - 1); } @@ -179,5 +189,14 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { return 0; } +#else /* !NT_GNU_BUILD_ID */ +/* + * Fallback implementation for targets that don't support the GNU + * extensions NT_GNU_BUILD_ID and __ehdr_start. + */ +COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { + return 0; +} +#endif #endif diff --git a/libcxx/include/__config b/libcxx/include/__config index 3cf23694f878..97e33f3157aa 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -354,6 +354,16 @@ # define _LIBCPP_NO_CFI #endif +// If the compiler supports using_if_exists, pretend we have those functions and they'll +// be picked up if the C library provides them. +// +// TODO: Once we drop support for Clang 12, we can assume the compiler supports using_if_exists +// for platforms that don't have a conforming C11 library, so we can drop this whole thing. +#if __has_attribute(using_if_exists) +# define _LIBCPP_HAS_TIMESPEC_GET +# define _LIBCPP_HAS_QUICK_EXIT +# define _LIBCPP_HAS_ALIGNED_ALLOC +#else #if (defined(__ISO_C_VISIBLE) && (__ISO_C_VISIBLE >= 2011)) || __cplusplus >= 201103L # if defined(__FreeBSD__) # define _LIBCPP_HAS_ALIGNED_ALLOC @@ -408,6 +418,7 @@ # endif # endif // __APPLE__ #endif +#endif // __has_attribute(using_if_exists) #ifndef _LIBCPP_CXX03_LANG # define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) diff --git a/libcxx/include/ctime b/libcxx/include/ctime index 8b2efd7449ca..2a3fdd12e874 100644 --- a/libcxx/include/ctime +++ b/libcxx/include/ctime @@ -59,7 +59,7 @@ int timespec_get( struct timespec *ts, int base); // C++17 // we're detecting this here instead of in <__config> because we can't include // system headers from <__config>, since it leads to circular module dependencies. // This is also meant to be a very temporary workaround until the SDKs are fixed. -#if defined(__APPLE__) +#if defined(__APPLE__) && !__has_attribute(using_if_exists) # include # if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL) # define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED diff --git a/libcxx/include/ios b/libcxx/include/ios index 3128bca89999..c9230d6a9484 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -607,8 +607,15 @@ public: static_assert((is_same<_CharT, typename traits_type::char_type>::value), "traits_type::char_type must be the same type as CharT"); +#ifdef _LIBCPP_CXX03_LANG + // Preserve the ability to compare with literal 0, + // and implicitly convert to bool, but not implicitly convert to int. + _LIBCPP_INLINE_VISIBILITY + operator void*() const {return fail() ? nullptr : (void*)this;} +#else _LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return !fail();} +#endif _LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();} _LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();} diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index a996a815599a..e1abb4dfab36 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -86,7 +86,8 @@ struct SymbolVersion { struct VersionDefinition { llvm::StringRef name; uint16_t id; - std::vector patterns; + std::vector nonLocalPatterns; + std::vector localPatterns; }; // This struct contains the global configuration for the linker. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 91e7df21a60a..594c20016827 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1351,18 +1351,19 @@ static void readConfigs(opt::InputArgList &args) { } assert(config->versionDefinitions.empty()); - config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}}); config->versionDefinitions.push_back( - {"global", (uint16_t)VER_NDX_GLOBAL, {}}); + {"local", (uint16_t)VER_NDX_LOCAL, {}, {}}); + config->versionDefinitions.push_back( + {"global", (uint16_t)VER_NDX_GLOBAL, {}, {}}); // If --retain-symbol-file is used, we'll keep only the symbols listed in // the file and discard all others. if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) { - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back( + config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back( {"*", /*isExternCpp=*/false, /*hasWildcard=*/true}); if (Optional buffer = readFile(arg->getValue())) for (StringRef s : args::getLines(*buffer)) - config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back( + config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back( {s, /*isExternCpp=*/false, /*hasWildcard=*/false}); } @@ -2069,23 +2070,37 @@ static void redirectSymbols(ArrayRef wrapped) { if (suffix1[0] != '@' || suffix1[1] == '@') continue; - // Check whether the default version foo@@v1 exists. If it exists, the - // symbol can be found by the name "foo" in the symbol table. - Symbol *maybeDefault = symtab->find(name); - if (!maybeDefault) + // Check the existing symbol foo. We have two special cases to handle: + // + // * There is a definition of foo@v1 and foo@@v1. + // * There is a definition of foo@v1 and foo. + Defined *sym2 = dyn_cast_or_null(symtab->find(name)); + if (!sym2) continue; - const char *suffix2 = maybeDefault->getVersionSuffix(); - if (suffix2[0] != '@' || suffix2[1] != '@' || - strcmp(suffix1 + 1, suffix2 + 2) != 0) - continue; - - // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1. - map.try_emplace(sym, maybeDefault); - // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate - // definition error. - maybeDefault->resolve(*sym); - // Eliminate foo@v1 from the symbol table. - sym->symbolKind = Symbol::PlaceholderKind; + const char *suffix2 = sym2->getVersionSuffix(); + if (suffix2[0] == '@' && suffix2[1] == '@' && + strcmp(suffix1 + 1, suffix2 + 2) == 0) { + // foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1. + map.try_emplace(sym, sym2); + // If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate + // definition error. + sym2->resolve(*sym); + // Eliminate foo@v1 from the symbol table. + sym->symbolKind = Symbol::PlaceholderKind; + } else if (auto *sym1 = dyn_cast(sym)) { + if (sym2->versionId > VER_NDX_GLOBAL + ? config->versionDefinitions[sym2->versionId].name == suffix1 + 1 + : sym1->section == sym2->section && sym1->value == sym2->value) { + // Due to an assembler design flaw, if foo is defined, .symver foo, + // foo@v1 defines both foo and foo@v1. Unless foo is bound to a + // different version, GNU ld makes foo@v1 canonical and elimiates foo. + // Emulate its behavior, otherwise we would have foo or foo@@v1 beside + // foo@v1. foo@v1 and foo combining does not apply if they are not + // defined in the same place. + map.try_emplace(sym2, sym); + sym2->symbolKind = Symbol::PlaceholderKind; + } + } } if (map.empty()) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index a938984ad945..01785f39ed75 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -849,17 +849,8 @@ void LinkerScript::diagnoseOrphanHandling() const { } uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) { - bool isTbss = - (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS; - uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot; - start = alignTo(start, alignment); - uint64_t end = start + size; - - if (isTbss) - ctx->threadBssOffset = end - dot; - else - dot = end; - return end; + dot = alignTo(dot, alignment) + size; + return dot; } void LinkerScript::output(InputSection *s) { @@ -931,13 +922,24 @@ static OutputSection *findFirstSection(PhdrEntry *load) { // This function assigns offsets to input sections and an output section // for a single sections command (e.g. ".text { *(.text); }"). void LinkerScript::assignOffsets(OutputSection *sec) { + const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS; const bool sameMemRegion = ctx->memRegion == sec->memRegion; const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; const uint64_t savedDot = dot; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; - if (sec->flags & SHF_ALLOC) { + if (!(sec->flags & SHF_ALLOC)) { + // Non-SHF_ALLOC sections have zero addresses. + dot = 0; + } else if (isTbss) { + // Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range + // starts from the end address of the previous tbss section. + if (ctx->tbssAddr == 0) + ctx->tbssAddr = dot; + else + dot = ctx->tbssAddr; + } else { if (ctx->memRegion) dot = ctx->memRegion->curPos; if (sec->addrExpr) @@ -950,9 +952,6 @@ void LinkerScript::assignOffsets(OutputSection *sec) { if (ctx->memRegion && ctx->memRegion->curPos < dot) expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, ctx->memRegion->name, sec->name); - } else { - // Non-SHF_ALLOC sections have zero addresses. - dot = 0; } switchTo(sec); @@ -1008,8 +1007,13 @@ void LinkerScript::assignOffsets(OutputSection *sec) { // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections // as they are not part of the process image. - if (!(sec->flags & SHF_ALLOC)) + if (!(sec->flags & SHF_ALLOC)) { dot = savedDot; + } else if (isTbss) { + // NOBITS TLS sections are similar. Additionally save the end address. + ctx->tbssAddr = dot; + dot = savedDot; + } } static bool isDiscardable(OutputSection &sec) { diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 0592c52acb84..d2487ae0f9d2 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -247,11 +247,11 @@ class LinkerScript final { // not be used outside of the scope of a call to the above functions. struct AddressState { AddressState(); - uint64_t threadBssOffset = 0; OutputSection *outSec = nullptr; MemoryRegion *memRegion = nullptr; MemoryRegion *lmaRegion = nullptr; uint64_t lmaOffset = 0; + uint64_t tbssAddr = 0; }; llvm::DenseMap nameToOutputSection; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e3cc210972b2..537859f9e0b5 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -527,6 +527,13 @@ static SmallSet getSymbolsAt(SharedSymbol &ss) { if (auto *alias = dyn_cast_or_null(sym)) ret.insert(alias); } + + // The loop does not check SHT_GNU_verneed, so ret does not contain + // non-default version symbols. If ss has a non-default version, ret won't + // contain ss. Just add ss unconditionally. If a non-default version alias is + // separately copy relocated, it and ss will have different addresses. + // Fortunately this case is impractical and fails with GNU ld as well. + ret.insert(&ss); return ret; } diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 2c980eb810c7..1c743fd47747 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -1496,9 +1496,9 @@ void ScriptParser::readAnonymousDeclaration() { std::vector globals; std::tie(locals, globals) = readSymbols(); for (const SymbolVersion &pat : locals) - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat); + config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat); for (const SymbolVersion &pat : globals) - config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat); + config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat); expect(";"); } @@ -1510,13 +1510,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) { std::vector locals; std::vector globals; std::tie(locals, globals) = readSymbols(); - for (const SymbolVersion &pat : locals) - config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat); // Create a new version definition and add that to the global symbols. VersionDefinition ver; ver.name = verStr; - ver.patterns = globals; + ver.nonLocalPatterns = std::move(globals); + ver.localPatterns = std::move(locals); ver.id = config->versionDefinitions.size(); config->versionDefinitions.push_back(ver); diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 70aea288c53f..22e6b4f92898 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -134,9 +134,20 @@ static bool canBeVersioned(const Symbol &sym) { StringMap> &SymbolTable::getDemangledSyms() { if (!demangledSyms) { demangledSyms.emplace(); + std::string demangled; for (Symbol *sym : symVector) - if (canBeVersioned(*sym)) - (*demangledSyms)[demangleItanium(sym->getName())].push_back(sym); + if (canBeVersioned(*sym)) { + StringRef name = sym->getName(); + size_t pos = name.find('@'); + if (pos == std::string::npos) + demangled = demangleItanium(name); + else if (pos + 1 == name.size() || name[pos + 1] == '@') + demangled = demangleItanium(name.substr(0, pos)); + else + demangled = + (demangleItanium(name.substr(0, pos)) + name.substr(pos)).str(); + (*demangledSyms)[demangled].push_back(sym); + } } return *demangledSyms; } @@ -150,19 +161,29 @@ std::vector SymbolTable::findByVersion(SymbolVersion ver) { return {}; } -std::vector SymbolTable::findAllByVersion(SymbolVersion ver) { +std::vector SymbolTable::findAllByVersion(SymbolVersion ver, + bool includeNonDefault) { std::vector res; SingleStringMatcher m(ver.name); + auto check = [&](StringRef name) { + size_t pos = name.find('@'); + if (!includeNonDefault) + return pos == StringRef::npos; + return !(pos + 1 < name.size() && name[pos + 1] == '@'); + }; if (ver.isExternCpp) { for (auto &p : getDemangledSyms()) if (m.match(p.first())) - res.insert(res.end(), p.second.begin(), p.second.end()); + for (Symbol *sym : p.second) + if (check(sym->getName())) + res.push_back(sym); return res; } for (Symbol *sym : symVector) - if (canBeVersioned(*sym) && m.match(sym->getName())) + if (canBeVersioned(*sym) && check(sym->getName()) && + m.match(sym->getName())) res.push_back(sym); return res; } @@ -172,7 +193,7 @@ void SymbolTable::handleDynamicList() { for (SymbolVersion &ver : config->dynamicList) { std::vector syms; if (ver.hasWildcard) - syms = findAllByVersion(ver); + syms = findAllByVersion(ver, /*includeNonDefault=*/true); else syms = findByVersion(ver); @@ -181,21 +202,13 @@ void SymbolTable::handleDynamicList() { } } -// Set symbol versions to symbols. This function handles patterns -// containing no wildcard characters. -void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, - StringRef versionName) { - if (ver.hasWildcard) - return; - +// Set symbol versions to symbols. This function handles patterns containing no +// wildcard characters. Return false if no symbol definition matches ver. +bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, + StringRef versionName, + bool includeNonDefault) { // Get a list of symbols which we need to assign the version to. std::vector syms = findByVersion(ver); - if (syms.empty()) { - if (!config->undefinedVersion) - error("version script assignment of '" + versionName + "' to symbol '" + - ver.name + "' failed: symbol not defined"); - return; - } auto getName = [](uint16_t ver) -> std::string { if (ver == VER_NDX_LOCAL) @@ -207,10 +220,11 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, // Assign the version. for (Symbol *sym : syms) { - // Skip symbols containing version info because symbol versions - // specified by symbol names take precedence over version scripts. - // See parseSymbolVersion(). - if (sym->getName().contains('@')) + // For a non-local versionId, skip symbols containing version info because + // symbol versions specified by symbol names take precedence over version + // scripts. See parseSymbolVersion(). + if (!includeNonDefault && versionId != VER_NDX_LOCAL && + sym->getName().contains('@')) continue; // If the version has not been assigned, verdefIndex is -1. Use an arbitrary @@ -225,13 +239,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, warn("attempt to reassign symbol '" + ver.name + "' of " + getName(sym->versionId) + " to " + getName(versionId)); } + return !syms.empty(); } -void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { +void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, + bool includeNonDefault) { // Exact matching takes precedence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned // to the symbol. This behavior is compatible with GNU. - for (Symbol *sym : findAllByVersion(ver)) + for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) if (sym->verdefIndex == UINT32_C(-1)) { sym->verdefIndex = 0; sym->versionId = versionId; @@ -244,26 +260,60 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { // script file, the script does not actually define any symbol version, // but just specifies symbols visibilities. void SymbolTable::scanVersionScript() { + SmallString<128> buf; // First, we assign versions to exact matching symbols, // i.e. version definitions not containing any glob meta-characters. - for (VersionDefinition &v : config->versionDefinitions) - for (SymbolVersion &pat : v.patterns) - assignExactVersion(pat, v.id, v.name); + std::vector syms; + for (VersionDefinition &v : config->versionDefinitions) { + auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { + bool found = + assignExactVersion(pat, id, ver, /*includeNonDefault=*/false); + buf.clear(); + found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf), + pat.isExternCpp, /*hasWildCard=*/false}, + id, ver, /*includeNonDefault=*/true); + if (!found && !config->undefinedVersion) + errorOrWarn("version script assignment of '" + ver + "' to symbol '" + + pat.name + "' failed: symbol not defined"); + }; + for (SymbolVersion &pat : v.nonLocalPatterns) + if (!pat.hasWildcard) + assignExact(pat, v.id, v.name); + for (SymbolVersion pat : v.localPatterns) + if (!pat.hasWildcard) + assignExact(pat, VER_NDX_LOCAL, "local"); + } // Next, assign versions to wildcards that are not "*". Note that because the // last match takes precedence over previous matches, we iterate over the // definitions in the reverse order. - for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) - for (SymbolVersion &pat : v.patterns) + auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) { + assignWildcardVersion(pat, id, /*includeNonDefault=*/false); + buf.clear(); + assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf), + pat.isExternCpp, /*hasWildCard=*/true}, + id, + /*includeNonDefault=*/true); + }; + for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) { + for (SymbolVersion &pat : v.nonLocalPatterns) if (pat.hasWildcard && pat.name != "*") - assignWildcardVersion(pat, v.id); + assignWildcard(pat, v.id, v.name); + for (SymbolVersion &pat : v.localPatterns) + if (pat.hasWildcard && pat.name != "*") + assignWildcard(pat, VER_NDX_LOCAL, v.name); + } // Then, assign versions to "*". In GNU linkers they have lower priority than // other wildcards. - for (VersionDefinition &v : config->versionDefinitions) - for (SymbolVersion &pat : v.patterns) + for (VersionDefinition &v : config->versionDefinitions) { + for (SymbolVersion &pat : v.nonLocalPatterns) if (pat.hasWildcard && pat.name == "*") - assignWildcardVersion(pat, v.id); + assignWildcard(pat, v.id, v.name); + for (SymbolVersion &pat : v.localPatterns) + if (pat.hasWildcard && pat.name == "*") + assignWildcard(pat, VER_NDX_LOCAL, v.name); + } // Symbol themselves might know their versions because symbols // can contain versions in the form of @. diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 507af8d2be75..54c4b1169ed1 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -65,12 +65,14 @@ public: private: std::vector findByVersion(SymbolVersion ver); - std::vector findAllByVersion(SymbolVersion ver); + std::vector findAllByVersion(SymbolVersion ver, + bool includeNonDefault); llvm::StringMap> &getDemangledSyms(); - void assignExactVersion(SymbolVersion ver, uint16_t versionId, - StringRef versionName); - void assignWildcardVersion(SymbolVersion ver, uint16_t versionId); + bool assignExactVersion(SymbolVersion ver, uint16_t versionId, + StringRef versionName, bool includeNonDefault); + void assignWildcardVersion(SymbolVersion ver, uint16_t versionId, + bool includeNonDefault); // The order the global symbols are in is not defined. We can use an arbitrary // order, but it has to be reproducible. That is true even when cross linking. diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 496be33dd182..cef303f05f89 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -208,6 +208,9 @@ OutputSection *Symbol::getOutputSection() const { // If a symbol name contains '@', the characters after that is // a symbol version name. This function parses that. void Symbol::parseSymbolVersion() { + // Return if localized by a local: pattern in a version script. + if (versionId == VER_NDX_LOCAL) + return; StringRef s = getName(); size_t pos = s.find('@'); if (pos == 0 || pos == StringRef::npos) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index a52ee4348f78..50af6e7d7939 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -24,6 +24,13 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- +* ``-z start-stop-gc`` is now supported and becomes the default. + (`D96914 `_) + (`rG6d2d3bd0 `_) +* ``--shuffle-sections=`` has been changed to ``--shuffle-sections==``. + If seed is -1, the matched input sections are reversed. + (`D98445 `_) + (`D98679 `_) * ``-Bsymbolic -Bsymbolic-functions`` has been changed to behave the same as ``-Bsymbolic-functions``. This matches GNU ld. (`D102461 `_) * ``-Bno-symbolic`` has been added. @@ -32,6 +39,75 @@ ELF Improvements (`D103303 `_) * ``-Bsymbolic-non-weak-functions`` has been added as a ``STB_GLOBAL`` subset of ``-Bsymbolic-functions``. (`D102570 `_) +* ``--no-allow-shlib-undefined`` has been improved to catch more cases. + (`D101996 `_) +* ``__rela_iplt_start`` is no longer defined for -pie/-shared. + This makes GCC/Clang ``-static-pie`` built executables work. + (`rG8cb78e99 `_) +* IRELATIVE/TLSDESC relocations now support ``-z rel``. + (`D100544 `_) +* Section groups with a zero flag are now supported. + This is used by ``comdat nodeduplicate`` in LLVM IR. + (`D96636 `_) + (`D106228 `_) +* Defined symbols are now resolved before undefined symbols to stabilize the bheavior of archive member extraction. + (`D95985 `_) +* ``STB_WEAK`` symbols are now preferred over COMMON symbols as a fix to a ``--fortran-common`` regression. + (`D105945 `_) +* Absolute relocations referencing undef weak now produce dynamic relocations for -pie, matching GOT-generating relocations. + (`D105164 `_) +* Exported symbols are now communicated to the LTO library so as to make LTO + based whole program devirtualization (``-flto=thin -fwhole-program-vtables``) + work with shared objects. + (`D91583 `_) +* Whole program devirtualization now respects ``local:`` version nodes in a version script. + (`D98220 `_) + (`D98686 `_) +* ``local:`` version nodes in a version script now apply to non-default version symbols. + (`D107234 `_) +* If an object file defines both ``foo`` and ``foo@v1``, now only ``foo@v1`` will be in the output. + (`D107235 `_) +* Copy relocations on non-default version symbols are now supported. + (`D107535 `_) + +Linker script changes: + +* ``.``, ``$``, and double quotes can now be used in symbol names in expressions. + (`D98306 `_) + (`rGe7a7ad13 `_) +* Fixed value of ``.`` in the output section description of ``.tbss``. + (`D107288 `_) +* ``NOLOAD`` sections can now be placed in a ``PT_LOAD`` program header. + (`D103815 `_) +* ``OUTPUT_FORMAT(default, big, little)`` now consults ``-EL`` and ``-EB``. + (`D96214 `_) +* The ``OVERWRITE_SECTIONS`` command has been added. + (`D103303 `_) +* The section order within an ``INSERT AFTER`` command is now preserved. + (`D105158 `_) + +Architecture specific changes: + +* aarch64_be is now supported. + (`D96188 `_) +* The AMDGPU port now supports ``--amdhsa-code-object-version=4`` object files; + (`D95811 `_) +* The ARM port now accounts for PC biases in range extension thunk creation. + (`D97550 `_) +* The AVR port now computes ``e_flags``. + (`D99754 `_) +* The Mips port now omits unneeded dynamic relocations for PIE non-preemptible TLS. + (`D101382 `_) +* The PowerPC port now supports ``--power10-stubs=no`` to omit Power10 instructions from call stubs. + (`D94625 `_) +* Fixed a thunk creation bug in the PowerPC port when TOC/NOTOC calls are mixed. + (`D101837 `_) +* The RISC-V port now resolves undefined weak relocations to the current location if not using PLT. + (`D103001 `_) +* ``R_386_GOTOFF`` relocations from .debug_info are now allowed to be compatible with GCC. + (`D95994 `_) +* ``gotEntrySize`` has been added to improve support for the ILP32 ABI of x86-64. + (`D102569 `_) Breaking changes ---------------- @@ -42,17 +118,75 @@ Breaking changes COFF Improvements ----------------- -* ... +* Avoid thread exhaustion when running on 32 bit Windows. + (`D105506 `_) + +* Improve terminating the process on Windows while a thread pool might be + running. (`D102944 `_) MinGW Improvements ------------------ -* ... +* Support for linking directly against a DLL without using an import library + has been added. (`D104530 `_ and + `D104531 `_) -MachO Improvements ------------------- +* Fix linking with ``--export-all-symbols`` in combination with + ``-function-sections``. (`D101522 `_ and + `D101615 `_) -* Item 1. +* Fix automatic export of symbols from LTO objects. + (`D101569 `_) + +* Accept more spellings of some options. + (`D107237 `_ and + `D107253 `_) + +Mach-O Improvements +------------------- + +The Mach-O backend is now able to link several large, real-world programs, +though we are still working out the kinks. + +* arm64 is now supported as a target. (`D88629 `_) +* arm64_32 is now supported as a target. (`D99822 `_) +* Branch-range-extension thunks are now supported. (`D100818 `_) +* ``-dead_strip`` is now supported. (`D103324 `_) +* Support for identical code folding (``--icf=all``) has been added. + (`D103292 `_) +* Support for special ``$start`` and ``$end`` symbols for segment & sections has been + added. (`D106767 `_, `D106629 `_) +* ``$ld$previous`` symbols are now supported. (`D103505 `_) +* ``$ld$install_name`` symbols are now supported. (`D103746 `_) +* ``__mh_*_header`` symbols are now supported. (`D97007 `_) +* LC_CODE_SIGNATURE is now supported. (`D96164 `_) +* LC_FUNCTION_STARTS is now supported. (`D97260 `_) +* LC_DATA_IN_CODE is now supported. (`D103006 `_) +* Bind opcodes are more compactly encoded. (`D106128 `_, + `D105075 `_) +* LTO cache support has been added. (`D105922 `_) +* ``-application_extension`` is now supported. (`D105818 `_) +* ``-export_dynamic`` is now partially supported. (`D105482 `_) +* ``-arch_multiple`` is now supported. (`D105450 `_) +* ``-final_output`` is now supported. (`D105449 `_) +* ``-umbrella`` is now supported. (`D105448 `_) +* ``--print-dylib-search`` is now supported. (`D103985 `_) +* ``-force_load_swift_libs`` is now supported. (`D103709 `_) +* ``-reexport_framework``, ``-reexport_library``, ``-reexport-l`` are now supported. + (`D103497 `_) +* ``.weak_def_can_be_hidden`` is now supported. (`D101080 `_) +* ``-add_ast_path`` is now supported. (`D100076 `_) +* ``-segprot`` is now supported. (`D99389 `_) +* ``-dependency_info`` is now partially supported. (`D98559 `_) +* ``--time-trace`` is now supported. (`D98419 `_) +* ``-mark_dead_strippable_dylib`` is now supported. (`D98262 `_) +* ``-[un]exported_symbol[s_list]`` is now supported. (`D98223 `_) +* ``-flat_namespace`` is now supported. (`D97641 `_) +* ``-rename_section`` and ``-rename_segment`` are now supported. (`D97600 `_) +* ``-bundle_loader`` is now supported. (`D95913 `_) +* ``-map`` is now partially supported. (`D98323 `_) + +There were numerous other bug-fixes as well. WebAssembly Improvements ------------------------ diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp index 1dfb32a92f3b..840f81719d7d 100644 --- a/lldb/source/Commands/CommandObjectMemoryTag.cpp +++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp @@ -7,8 +7,11 @@ //===----------------------------------------------------------------------===// #include "CommandObjectMemoryTag.h" +#include "lldb/Host/OptionParser.h" #include "lldb/Interpreter/CommandReturnObject.h" #include "lldb/Interpreter/OptionArgParser.h" +#include "lldb/Interpreter/OptionGroupFormat.h" +#include "lldb/Interpreter/OptionValueString.h" #include "lldb/Target/Process.h" using namespace lldb; @@ -21,7 +24,8 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed { public: CommandObjectMemoryTagRead(CommandInterpreter &interpreter) : CommandObjectParsed(interpreter, "tag", - "Read memory tags for the given range of memory.", + "Read memory tags for the given range of memory." + " Mismatched tags will be marked.", nullptr, eCommandRequiresTarget | eCommandRequiresProcess | eCommandProcessMustBePaused) { @@ -97,16 +101,17 @@ protected: return false; } - result.AppendMessageWithFormatv("Logical tag: {0:x}", - tag_manager->GetLogicalTag(start_addr)); + lldb::addr_t logical_tag = tag_manager->GetLogicalTag(start_addr); + result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag); result.AppendMessage("Allocation tags:"); addr_t addr = tagged_range->GetRangeBase(); for (auto tag : *tags) { addr_t next_addr = addr + tag_manager->GetGranuleSize(); // Showing tagged adresses here until we have non address bit handling - result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}", addr, next_addr, - tag); + result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}{3}", addr, + next_addr, tag, + logical_tag == tag ? "" : " (mismatch)"); addr = next_addr; } @@ -115,6 +120,168 @@ protected: } }; +#define LLDB_OPTIONS_memory_tag_write +#include "CommandOptions.inc" + +class CommandObjectMemoryTagWrite : public CommandObjectParsed { +public: + class OptionGroupTagWrite : public OptionGroup { + public: + OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {} + + ~OptionGroupTagWrite() override = default; + + llvm::ArrayRef GetDefinitions() override { + return llvm::makeArrayRef(g_memory_tag_write_options); + } + + Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_value, + ExecutionContext *execution_context) override { + Status status; + const int short_option = + g_memory_tag_write_options[option_idx].short_option; + + switch (short_option) { + case 'e': + m_end_addr = OptionArgParser::ToAddress(execution_context, option_value, + LLDB_INVALID_ADDRESS, &status); + break; + default: + llvm_unreachable("Unimplemented option"); + } + + return status; + } + + void OptionParsingStarting(ExecutionContext *execution_context) override { + m_end_addr = LLDB_INVALID_ADDRESS; + } + + lldb::addr_t m_end_addr; + }; + + CommandObjectMemoryTagWrite(CommandInterpreter &interpreter) + : CommandObjectParsed(interpreter, "tag", + "Write memory tags starting from the granule that " + "contains the given address.", + nullptr, + eCommandRequiresTarget | eCommandRequiresProcess | + eCommandProcessMustBePaused), + m_option_group(), m_tag_write_options() { + // Address + m_arguments.push_back( + CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)}); + // One or more tag values + m_arguments.push_back(CommandArgumentEntry{ + CommandArgumentData(eArgTypeValue, eArgRepeatPlus)}); + + m_option_group.Append(&m_tag_write_options); + m_option_group.Finalize(); + } + + ~CommandObjectMemoryTagWrite() override = default; + + Options *GetOptions() override { return &m_option_group; } + +protected: + bool DoExecute(Args &command, CommandReturnObject &result) override { + if (command.GetArgumentCount() < 2) { + result.AppendError("wrong number of arguments; expected " + " [ [...]]"); + return false; + } + + Status error; + addr_t start_addr = OptionArgParser::ToAddress( + &m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error); + if (start_addr == LLDB_INVALID_ADDRESS) { + result.AppendErrorWithFormatv("Invalid address expression, {0}", + error.AsCString()); + return false; + } + + command.Shift(); // shift off start address + + std::vector tags; + for (auto &entry : command) { + lldb::addr_t tag_value; + // getAsInteger returns true on failure + if (entry.ref().getAsInteger(0, tag_value)) { + result.AppendErrorWithFormat( + "'%s' is not a valid unsigned decimal string value.\n", + entry.c_str()); + return false; + } + tags.push_back(tag_value); + } + + Process *process = m_exe_ctx.GetProcessPtr(); + llvm::Expected tag_manager_or_err = + process->GetMemoryTagManager(); + + if (!tag_manager_or_err) { + result.SetError(Status(tag_manager_or_err.takeError())); + return false; + } + + const MemoryTagManager *tag_manager = *tag_manager_or_err; + + MemoryRegionInfos memory_regions; + // If this fails the list of regions is cleared, so we don't need to read + // the return status here. + process->GetMemoryRegions(memory_regions); + + // We have to assume start_addr is not granule aligned. + // So if we simply made a range: + // (start_addr, start_addr + (N * granule_size)) + // We would end up with a range that isn't N granules but N+1 + // granules. To avoid this we'll align the start first using the method that + // doesn't check memory attributes. (if the final range is untagged we'll + // handle that error later) + lldb::addr_t aligned_start_addr = + tag_manager->ExpandToGranule(MemoryTagManager::TagRange(start_addr, 1)) + .GetRangeBase(); + + lldb::addr_t end_addr = 0; + // When you have an end address you want to align the range like tag read + // does. Meaning, align the start down (which we've done) and align the end + // up. + if (m_tag_write_options.m_end_addr != LLDB_INVALID_ADDRESS) + end_addr = m_tag_write_options.m_end_addr; + else + // Without an end address assume number of tags matches number of granules + // to write to + end_addr = + aligned_start_addr + (tags.size() * tag_manager->GetGranuleSize()); + + // Now we've aligned the start address so if we ask for another range + // using the number of tags N, we'll get back a range that is also N + // granules in size. + llvm::Expected tagged_range = + tag_manager->MakeTaggedRange(aligned_start_addr, end_addr, + memory_regions); + + if (!tagged_range) { + result.SetError(Status(tagged_range.takeError())); + return false; + } + + Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(), + tagged_range->GetByteSize(), tags); + + if (status.Fail()) { + result.SetError(status); + return false; + } + + result.SetStatus(eReturnStatusSuccessFinishResult); + return true; + } + + OptionGroupOptions m_option_group; + OptionGroupTagWrite m_tag_write_options; +}; + CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter) : CommandObjectMultiword( interpreter, "tag", "Commands for manipulating memory tags", @@ -123,6 +290,11 @@ CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter) new CommandObjectMemoryTagRead(interpreter)); read_command_object->SetCommandName("memory tag read"); LoadSubCommand("read", read_command_object); + + CommandObjectSP write_command_object( + new CommandObjectMemoryTagWrite(interpreter)); + write_command_object->SetCommandName("memory tag write"); + LoadSubCommand("write", write_command_object); } CommandObjectMemoryTag::~CommandObjectMemoryTag() = default; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 9c9b7c6e9b82..6abb4788bed0 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -504,6 +504,14 @@ let Command = "memory write" in { Desc<"Start writing bytes from an offset within the input file.">; } +let Command = "memory tag write" in { + def memory_write_end_addr : Option<"end-addr", "e">, Group<1>, + Arg<"AddressOrExpression">, Desc< + "Set tags for start address to end-addr, repeating tags as needed" + " to cover the range. (instead of calculating the range from the" + " number of tags given)">; +} + let Command = "register read" in { def register_read_alternate : Option<"alternate", "A">, Desc<"Display register names using the alternate register name if there " diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 5e69b5793f9f..8e1f6bc29a6f 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -3474,15 +3474,31 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemTags( if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':') return SendIllFormedResponse(packet, invalid_type_err); - int32_t type = - packet.GetS32(std::numeric_limits::max(), /*base=*/16); - if (type == std::numeric_limits::max() || + // Type is a signed integer but packed into the packet as its raw bytes. + // However, our GetU64 uses strtoull which allows +/-. We do not want this. + const char *first_type_char = packet.Peek(); + if (first_type_char && (*first_type_char == '+' || *first_type_char == '-')) + return SendIllFormedResponse(packet, invalid_type_err); + + // Extract type as unsigned then cast to signed. + // Using a uint64_t here so that we have some value outside of the 32 bit + // range to use as the invalid return value. + uint64_t raw_type = + packet.GetU64(std::numeric_limits::max(), /*base=*/16); + + if ( // Make sure the cast below would be valid + raw_type > std::numeric_limits::max() || // To catch inputs like "123aardvark" that will parse but clearly aren't // valid in this case. packet.GetBytesLeft()) { return SendIllFormedResponse(packet, invalid_type_err); } + // First narrow to 32 bits otherwise the copy into type would take + // the wrong 4 bytes on big endian. + uint32_t raw_type_32 = raw_type; + int32_t type = reinterpret_cast(raw_type_32); + StreamGDBRemote response; std::vector tags; Status error = m_current_process->ReadMemoryTags(type, addr, length, tags); @@ -3552,7 +3568,11 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags( packet.GetU64(std::numeric_limits::max(), /*base=*/16); if (raw_type > std::numeric_limits::max()) return SendIllFormedResponse(packet, invalid_type_err); - int32_t type = static_cast(raw_type); + + // First narrow to 32 bits. Otherwise the copy below would get the wrong + // 4 bytes on big endian. + uint32_t raw_type_32 = raw_type; + int32_t type = reinterpret_cast(raw_type_32); // Tag data if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':') diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 252b06e269d6..0b3f7e4f3bd4 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -223,62 +223,32 @@ void TypeSystemMap::ForEach(std::function const &callback) { llvm::Expected TypeSystemMap::GetTypeSystemForLanguage( lldb::LanguageType language, llvm::Optional create_callback) { - llvm::Error error = llvm::Error::success(); - assert(!error); // Check the success value when assertions are enabled std::lock_guard guard(m_mutex); - if (m_clear_in_progress) { - error = llvm::make_error( + if (m_clear_in_progress) + return llvm::make_error( "Unable to get TypeSystem because TypeSystemMap is being cleared", llvm::inconvertibleErrorCode()); - } else { - collection::iterator pos = m_map.find(language); - if (pos != m_map.end()) { - auto *type_system = pos->second.get(); - if (type_system) { - llvm::consumeError(std::move(error)); - return *type_system; - } - error = llvm::make_error( - "TypeSystem for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)) + - " doesn't exist", - llvm::inconvertibleErrorCode()); - return std::move(error); - } - for (const auto &pair : m_map) { - if (pair.second && pair.second->SupportsLanguage(language)) { - // Add a new mapping for "language" to point to an already existing - // TypeSystem that supports this language - m_map[language] = pair.second; - if (pair.second.get()) { - llvm::consumeError(std::move(error)); - return *pair.second.get(); - } - error = llvm::make_error( - "TypeSystem for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)) + - " doesn't exist", - llvm::inconvertibleErrorCode()); - return std::move(error); - } - } + collection::iterator pos = m_map.find(language); + if (pos != m_map.end()) { + auto *type_system = pos->second.get(); + if (type_system) + return *type_system; + return llvm::make_error( + "TypeSystem for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)) + + " doesn't exist", + llvm::inconvertibleErrorCode()); + } - if (!create_callback) { - error = llvm::make_error( - "Unable to find type system for language " + - llvm::StringRef(Language::GetNameForLanguageType(language)), - llvm::inconvertibleErrorCode()); - } else { - // Cache even if we get a shared pointer that contains a null type system - // back - TypeSystemSP type_system_sp = (*create_callback)(); - m_map[language] = type_system_sp; - if (type_system_sp.get()) { - llvm::consumeError(std::move(error)); - return *type_system_sp.get(); - } - error = llvm::make_error( + for (const auto &pair : m_map) { + if (pair.second && pair.second->SupportsLanguage(language)) { + // Add a new mapping for "language" to point to an already existing + // TypeSystem that supports this language + m_map[language] = pair.second; + if (pair.second.get()) + return *pair.second.get(); + return llvm::make_error( "TypeSystem for language " + llvm::StringRef(Language::GetNameForLanguageType(language)) + " doesn't exist", @@ -286,7 +256,23 @@ llvm::Expected TypeSystemMap::GetTypeSystemForLanguage( } } - return std::move(error); + if (!create_callback) + return llvm::make_error( + "Unable to find type system for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)), + llvm::inconvertibleErrorCode()); + + // Cache even if we get a shared pointer that contains a null type system + // back + TypeSystemSP type_system_sp = (*create_callback)(); + m_map[language] = type_system_sp; + if (type_system_sp.get()) + return *type_system_sp.get(); + return llvm::make_error( + "TypeSystem for language " + + llvm::StringRef(Language::GetNameForLanguageType(language)) + + " doesn't exist", + llvm::inconvertibleErrorCode()); } llvm::Expected diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 90ec742f18e6..f46e66641c08 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -744,6 +744,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + /// Return the minimum or maximum constant value for the specified integer + /// min/max flavor and type. + APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth); + /// Check if the values in \p VL are select instructions that can be converted /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a /// conversion is possible, together with a bool indicating whether all select diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 81e29d9b86e8..97aea5aedf22 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -324,6 +324,9 @@ public: /// name is not found. GlobalValue *getNamedValue(StringRef Name) const; + /// Return the number of global values in the module. + unsigned getNumNamedValues() const; + /// Return a unique non-zero ID for the specified metadata kind. This ID is /// uniqued across modules in the current LLVMContext. unsigned getMDKindID(StringRef Name) const; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 08a934e6985f..c0cedb23bdcf 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1104,6 +1104,7 @@ namespace RawInstrProf { // Version 5: Bit 60 of FuncHash is reserved for the flag for the context // sensitive records. // Version 6: Added binary id. +// Version 7: Reorder binary id and include version in signature. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 08a642469627..7d2097cfc297 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) @@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 7 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c93b8adcc890..c3c12fd23746 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1855,6 +1855,10 @@ public: /// static void createShallowWrapper(Function &F); + /// Returns true if the function \p F can be internalized. i.e. it has a + /// compatible linkage. + static bool isInternalizable(Function &F); + /// Make another copy of the function \p F such that the copied version has /// internal linkage afterwards and can be analysed. Then we replace all uses /// of the original function to the copied one @@ -1870,6 +1874,22 @@ public: /// null pointer. static Function *internalizeFunction(Function &F, bool Force = false); + /// Make copies of each function in the set \p FnSet such that the copied + /// version has internal linkage afterwards and can be analysed. Then we + /// replace all uses of the original function to the copied one. The map + /// \p FnMap contains a mapping of functions to their internalized versions. + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This version will internalize all the functions in the set \p FnSet at + /// once and then replace the uses. This prevents internalized functions being + /// called by external functions when there is an internalized version in the + /// module. + static bool internalizeFunctions(SmallPtrSetImpl &FnSet, + DenseMap &FnMap); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index c4030735d965..c922476ac79d 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -51,11 +51,13 @@ #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" namespace llvm { @@ -176,7 +178,7 @@ public: class PredicateInfo { public: PredicateInfo(Function &, DominatorTree &, AssumptionCache &); - ~PredicateInfo() = default; + ~PredicateInfo(); void verifyPredicateInfo() const; @@ -203,6 +205,8 @@ private: // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos // vector. DenseMap PredicateMap; + // The set of ssa_copy declarations we created with our custom mangling. + SmallSet, 20> CreatedDeclarations; }; // This pass does eager building and then printing of PredicateInfo. It is used diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 8662dbf385dc..59bf3a342caa 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -83,6 +83,9 @@ class SCEVExpander : public SCEVVisitor { /// InsertedValues/InsertedPostIncValues. SmallPtrSet ReusedValues; + // The induction variables generated. + SmallVector InsertedIVs; + /// A memoization of the "relevant" loop for a given SCEV. DenseMap RelevantLoops; @@ -199,9 +202,11 @@ public: InsertedPostIncValues.clear(); ReusedValues.clear(); ChainedPhis.clear(); + InsertedIVs.clear(); } ScalarEvolution *getSE() { return &SE; } + const SmallVectorImpl &getInsertedIVs() const { return InsertedIVs; } /// Return a vector containing all instructions inserted during expansion. SmallVector getAllInsertedInstructions() const { diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 23083bc8178e..69ab0052b0a7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4080,6 +4080,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, std::swap(TrueVal, FalseVal); } + // Check for integer min/max with a limit constant: + // X > MIN_INT ? X : MIN_INT --> X + // X < MAX_INT ? X : MAX_INT --> X + if (TrueVal->getType()->isIntOrIntVectorTy()) { + Value *X, *Y; + SelectPatternFlavor SPF = + matchDecomposedSelectPattern(cast(CondVal), TrueVal, FalseVal, + X, Y).Flavor; + if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) { + APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF), + X->getType()->getScalarSizeInBits()); + if (match(Y, m_SpecificInt(LimitC))) + return X; + } + } + if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) { Value *X; const APInt *Y; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 522d21812c6a..6e3ca5c4e08a 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6253,6 +6253,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) { return getMinMaxPred(getInverseMinMaxFlavor(SPF)); } +APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) { + switch (SPF) { + case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth); + case SPF_SMIN: return APInt::getSignedMinValue(BitWidth); + case SPF_UMAX: return APInt::getMaxValue(BitWidth); + case SPF_UMIN: return APInt::getMinValue(BitWidth); + default: llvm_unreachable("Unexpected flavor"); + } +} + std::pair llvm::canConvertToMinOrMaxIntrinsic(ArrayRef VL) { // Check if VL contains select instructions that can be folded into a min/max diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1bba7232eb14..4f730b2cf372 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20560,8 +20560,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index add34eccc1f3..de096f95afcb 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) + if ((Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) { + if (Retain && + (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && + !TM.getTargetTriple().isOSSolaris()) { EmitUniqueSection = true; Flags |= ELF::SHF_GNU_RETAIN; } diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 5f05aa2e94e7..e1e28d1230b0 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, } } -/// Wrapper around getFoldedSizeOfImpl() that adds caching. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap &Cache); - -/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded, - DenseMap &Cache) { - // This is the actual implementation of getFoldedSizeOf(). To get the caching - // behavior, we need to call getFoldedSizeOf() when we recurse. - - if (ArrayType *ATy = dyn_cast(Ty)) { - Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has size zero. - if (NumElems == 0) - return ConstantExpr::getNullValue(DestTy); - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantInt::get(DestTy, NumElems); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // Pointer size doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return getFoldedSizeOf( - PointerType::get(IntegerType::get(PTy->getContext(), 1), - PTy->getAddressSpace()), - DestTy, true, Cache); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular sizeof expression. - Constant *C = ConstantExpr::getSizeOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded, - DenseMap &Cache) { - // Check for previously generated folded size constant. - auto It = Cache.find(Ty); - if (It != Cache.end()) - return It->second; - return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache); -} - -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { - DenseMap Cache; - return getFoldedSizeOf(Ty, DestTy, Folded, Cache); -} - -/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known -/// factors factored out. If Folded is false, return null if no factoring was -/// possible, to avoid endlessly bouncing an unfoldable expression back into the -/// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { - // The alignment of an array is equal to the alignment of the - // array element. Note that this is not always true for vectors. - if (ArrayType *ATy = dyn_cast(Ty)) { - Constant *C = ConstantExpr::getAlignOf(ATy->getElementType()); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, - false), - C, DestTy); - return C; - } - - if (StructType *STy = dyn_cast(Ty)) { - // Packed structs always have an alignment of 1. - if (STy->isPacked()) - return ConstantInt::get(DestTy, 1); - - // Otherwise, struct alignment is the maximum alignment of any member. - // Without target data, we can't compare much, but we can check to see - // if all the members have the same alignment. - unsigned NumElems = STy->getNumElements(); - // An empty struct has minimal alignment. - if (NumElems == 0) - return ConstantInt::get(DestTy, 1); - // Check for a struct with all members having the same alignment. - Constant *MemberAlign = - getFoldedAlignOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) - return MemberAlign; - } - - // Pointer alignment doesn't depend on the pointee type, so canonicalize them - // to an arbitrary pointee. - if (PointerType *PTy = dyn_cast(Ty)) - if (!PTy->getElementType()->isIntegerTy(1)) - return - getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), - 1), - PTy->getAddressSpace()), - DestTy, true); - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular alignof expression. - Constant *C = ConstantExpr::getAlignOf(Ty); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - -/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with -/// any known factors factored out. If Folded is false, return null if no -/// factoring was possible, to avoid endlessly bouncing an unfoldable expression -/// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, - bool Folded) { - if (ArrayType *ATy = dyn_cast(Ty)) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, - DestTy, false), - FieldNo, DestTy); - Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); - return ConstantExpr::getNUWMul(E, N); - } - - if (StructType *STy = dyn_cast(Ty)) - if (!STy->isPacked()) { - unsigned NumElems = STy->getNumElements(); - // An empty struct has no members. - if (NumElems == 0) - return nullptr; - // Check for a struct with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(STy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(STy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) { - Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, - false, - DestTy, - false), - FieldNo, DestTy); - return ConstantExpr::getNUWMul(MemberSize, N); - } - } - - // If there's no interesting folding happening, bail so that we don't create - // a constant that looks like it needs folding but really doesn't. - if (!Folded) - return nullptr; - - // Base case: Get a regular offsetof expression. - Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo); - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - DestTy, false), - C, DestTy); - return C; -} - Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, Type *DestTy) { if (isa(V)) @@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, // Is it a null pointer value? if (V->isNullValue()) return ConstantInt::get(DestTy, 0); - // If this is a sizeof-like expression, pull out multiplications by - // known factors to expose them to subsequent folding. If it's an - // alignof-like expression, factor out known factors. - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::GetElementPtr && - CE->getOperand(0)->isNullValue()) { - // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and - // getFoldedAlignOf() don't handle the case when DestTy is a vector of - // pointers yet. We end up in asserts in CastInst::getCastOpcode (see - // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this - // happen in one "real" C-code test case, so it does not seem to be an - // important optimization to handle vectors here. For now, simply bail - // out. - if (DestTy->isVectorTy()) - return nullptr; - GEPOperator *GEPO = cast(CE); - Type *Ty = GEPO->getSourceElementType(); - if (CE->getNumOperands() == 2) { - // Handle a sizeof-like expression. - Constant *Idx = CE->getOperand(1); - bool isOne = isa(Idx) && cast(Idx)->isOne(); - if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) { - Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true, - DestTy, false), - Idx, DestTy); - return ConstantExpr::getMul(C, Idx); - } - } else if (CE->getNumOperands() == 3 && - CE->getOperand(1)->isNullValue()) { - // Handle an alignof-like expression. - if (StructType *STy = dyn_cast(Ty)) - if (!STy->isPacked()) { - ConstantInt *CI = cast(CE->getOperand(2)); - if (CI->isOne() && - STy->getNumElements() == 2 && - STy->getElementType(0)->isIntegerTy(1)) { - return getFoldedAlignOf(STy->getElementType(1), DestTy, false); - } - } - // Handle an offsetof-like expression. - if (Ty->isStructTy() || Ty->isArrayTy()) { - if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), - DestTy, false)) - return C; - } - } - } // Other pointer types cannot be casted return nullptr; case Instruction::UIToFP: diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index 7c18dc0ed299..63ea41fba89a 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const { return cast_or_null(getValueSymbolTable().lookup(Name)); } +unsigned Module::getNumNamedValues() const { + return getValueSymbolTable().size(); +} + /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. /// This ID is uniqued across modules in the current LLVMContext. unsigned Module::getMDKindID(StringRef Name) const { diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 8a4470ae207d..a0460062f307 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -366,6 +366,7 @@ Error RawInstrProfReader::readHeader( if (GET_VERSION(Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); + BinaryIdsSize = swap(Header.BinaryIdsSize); CountersDelta = swap(Header.CountersDelta); NamesDelta = swap(Header.NamesDelta); auto DataSize = swap(Header.DataSize); @@ -374,7 +375,6 @@ Error RawInstrProfReader::readHeader( auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); NamesSize = swap(Header.NamesSize); ValueKindLast = swap(Header.ValueKindLast); - BinaryIdsSize = swap(Header.BinaryIdsSize); auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData); auto PaddingSize = getNumPaddingBytes(NamesSize); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ca6b87a5ebb0..b27a02b8c182 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4353,8 +4353,13 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); Mask = DAG.getNode( ISD::ZERO_EXTEND, DL, @@ -4453,8 +4458,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, if (IsFixedLength) { assert(Subtarget->useSVEForFixedLengthVectors() && "Cannot lower when not using SVE for fixed vectors"); - IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); - MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) { + IndexVT = getContainerForFixedLengthVector(DAG, IndexVT); + MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType()); + } else { + MemVT = getContainerForFixedLengthVector(DAG, MemVT); + IndexVT = MemVT.changeTypeToInteger(); + } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); StoreVal = diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b03d421d3e6d..091a62aa4ada 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, if (!MI.getOperand(1).isReg()) return false; + auto NormalizeCmpValue = [](int64_t Value) -> int { + // Comparison immediates may be 64-bit, but CmpValue is only an int. + // Normalize to 0/1/2 return value, where 2 indicates any value apart from + // 0 or 1. + // TODO: Switch CmpValue to int64_t in the API to avoid this. + if (Value == 0 || Value == 1) + return Value; + return 2; + }; + switch (MI.getOpcode()) { default: break; @@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm()); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr - CmpValue = AArch64_AM::decodeLogicalImmediate( + CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64)); return true; } @@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); + // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1. + assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) && + "CmpValue must be 0, 1, or 2!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr( if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2167ad5d7467..e68a3aa8bf47 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1647,7 +1647,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, "CMP_SWAP not expected to be custom expanded for Thumb1"); assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); - assert(ARM::tGPRRegClass.contains(DesiredReg) && + assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && "DesiredReg used for UXT op must be tGPR"); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index 3bc5556a62f4..417e8b6ffec3 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -54,6 +54,24 @@ public: return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) + return SCEVCheapExpansionBudget.getValue() + 1; + + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, + Opd2PropInfo); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index d5a7873bd056..abf5b213bbac 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, case Intrinsic::experimental_constrained_sin: case Intrinsic::experimental_constrained_cos: return true; + // There is no corresponding FMA instruction for PPC double double. + // Thus, we need to disable CTR loop generation for this type. + case Intrinsic::fmuladd: case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 342497150d49..8af3c8f5cfdb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -77,6 +77,39 @@ def simm5_plus1 : Operand, ImmLeaf: Sched <[!cast("WriteVMov" # n # "V"), + !cast("ReadVMov" # n # "V")]>; + +class VLESched : Sched <[!cast("WriteVLDE" # n), + ReadVLDX, ReadVMask]>; + +class VSESched : Sched <[!cast("WriteVSTE" # n), + !cast("ReadVSTE" # n # "V"), + ReadVSTX, ReadVMask]>; + +class VLSSched : Sched <[!cast("WriteVLDS" # n), + ReadVLDX, ReadVLDSX, ReadVMask]>; + +class VSSSched : Sched <[!cast("WriteVSTS" # n), + !cast("ReadVSTS" # n # "V"), + ReadVSTX, ReadVSTSX, ReadVMask]>; + +class VLXSched : + Sched <[!cast("WriteVLD" # o # "X" # n), + ReadVLDX, !cast("ReadVLD" # o # "XV"), ReadVMask]>; + +class VSXSched : + Sched <[!cast("WriteVST" # o # "X" # n), + !cast("ReadVST" # o # "X" # n), + ReadVSTX, !cast("ReadVST" # o # "XV"), ReadVMask]>; + +class VLFSched : Sched <[!cast("WriteVLDFF" # n), + ReadVLDX, ReadVMask]>; + //===----------------------------------------------------------------------===// // Instruction class templates //===----------------------------------------------------------------------===// @@ -328,106 +361,417 @@ class VAMONoWd // Use these multiclasses to define instructions more easily. //===----------------------------------------------------------------------===// multiclass VALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { - def V : VALUVV; - def X : VALUVX; - def I : VALUVI; + def V : VALUVV, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } multiclass VALU_IV_V_X funct6, string vw = "v"> { - def V : VALUVV; - def X : VALUVX; -} - -multiclass VALUr_IV_V_X funct6, string vw = "v"> { - def V : VALUrVV; - def X : VALUrVX; + def V : VALUVV, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; } multiclass VALU_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { - def X : VALUVX; - def I : VALUVI; -} - -multiclass VALU_IV_V funct6> { - def _VS : VALUVV; -} - -multiclass VALUr_IV_X funct6, string vw = "v"> { - def X : VALUrVX; + def X : VALUVX, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; } multiclass VALU_MV_V_X funct6, string vw = "v"> { - def V : VALUVV; - def X : VALUVX; + def V : VALUVV, + Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; } -multiclass VALU_MV_V funct6> { - def _VS : VALUVV; +multiclass VMAC_MV_V_X funct6, string vw = "v"> { + def V : VALUrVV, + Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + def X : VALUrVX, + Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; } -multiclass VALU_MV_Mask funct6, string vm = "v"> { - def M : VALUVVNoVm; +multiclass VWMAC_MV_V_X funct6, string vw = "v"> { + def V : VALUrVV, + Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + def X : VALUrVX, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } -multiclass VALU_MV_X funct6, string vw = "v"> { - def X : VALUVX; -} - -multiclass VALUr_MV_V_X funct6, string vw = "v"> { - def V : VALUrVV; - def X : VALUrVX; -} - -multiclass VALUr_MV_X funct6, string vw = "v"> { - def X : VALUrVX; +multiclass VWMAC_MV_X funct6, string vw = "v"> { + def X : VALUrVX, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } multiclass VALU_MV_VS2 funct6, bits<5> vs1> { - def "" : VALUVs2; + def "" : VALUVs2, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } multiclass VALUm_IV_V_X_I funct6> { - def VM : VALUmVV; - def XM : VALUmVX; - def IM : VALUmVI; + def VM : VALUmVV, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + def IM : VALUmVI, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; +} + +multiclass VMRG_IV_V_X_I funct6> { + def VM : VALUmVV, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + def XM : VALUmVX, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + def IM : VALUmVI, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } multiclass VALUm_IV_V_X funct6> { - def VM : VALUmVV; - def XM : VALUmVX; + def VM : VALUmVV, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + def XM : VALUmVX, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } multiclass VALUNoVm_IV_V_X_I funct6, Operand optype = simm5> { - def V : VALUVVNoVm; - def X : VALUVXNoVm; - def I : VALUVINoVm; + def V : VALUVVNoVm, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; + def I : VALUVINoVm, + Sched<[WriteVICALUI, ReadVIALUCV]>; } multiclass VALUNoVm_IV_V_X funct6> { - def V : VALUVVNoVm; - def X : VALUVXNoVm; + def V : VALUVVNoVm, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + def X : VALUVXNoVm, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; } multiclass VALU_FV_V_F funct6, string vw = "v"> { - def V : VALUVV; - def F : VALUVF; + def V : VALUVV, + Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } multiclass VALU_FV_F funct6, string vw = "v"> { - def F : VALUVF; + def F : VALUVF, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; } -multiclass VALUr_FV_V_F funct6, string vw = "v"> { - def V : VALUrVV; - def F : VALUrVF; +multiclass VWALU_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; } -multiclass VALU_FV_V funct6> { - def _VS : VALUVV; +multiclass VMUL_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>; } -multiclass VALU_FV_VS2 funct6, bits<5> vs1> { - def "" : VALUVs2; +multiclass VDIV_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; +} + +multiclass VRDIV_FV_F funct6, string vw = "v"> { + def F : VALUVF, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; +} + +multiclass VWMUL_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>; +} + +multiclass VMAC_FV_V_F funct6, string vw = "v"> { + def V : VALUrVV, + Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>; + def F : VALUrVF, + Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>; +} + +multiclass VWMAC_FV_V_F funct6, string vw = "v"> { + def V : VALUrVV, + Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>; + def F : VALUrVF, + Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>; +} + +multiclass VSQR_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; +} + +multiclass VRCP_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; +} + +multiclass VCMP_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VCMP_FV_F funct6, string vw = "v"> { + def F : VALUVF, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VSGNJ_FV_V_F funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>; + def F : VALUVF, + Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>; +} + +multiclass VCLS_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; +} + +multiclass VCVTF_IV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; +} + +multiclass VCVTI_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_IV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; +} + +multiclass VWCVTI_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; +} + +multiclass VWCVTF_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; +} + +multiclass VNCVTF_IV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; +} + +multiclass VNCVTI_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; +} + +multiclass VNCVTF_FV_VS2 funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; +} + +multiclass VRED_MV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>; +} + +multiclass VWRED_IV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>; +} + +multiclass VRED_FV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>; +} + +multiclass VREDO_FV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>; +} + +multiclass VWRED_FV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>; +} + +multiclass VWREDO_FV_V funct6> { + def _VS : VALUVV, + Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>; +} + +multiclass VMALU_MV_Mask funct6, string vm = "v"> { + def M : VALUVVNoVm, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; +} + +multiclass VMSFS_MV_V funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>; +} + +multiclass VMIOT_MV_V funct6, bits<5> vs1> { + def "" : VALUVs2, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; +} + +multiclass VSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; +} + +multiclass VNSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VCMP_IV_V_X funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; +} + +multiclass VMUL_MV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; +} + +multiclass VWMUL_MV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; +} + +multiclass VDIV_MV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; +} + +multiclass VSALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>; +} + +multiclass VSALU_IV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; +} + +multiclass VAALU_MV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>; +} + +multiclass VSMUL_IV_V_X funct6, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>; +} + +multiclass VSSHF_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>; +} + +multiclass VNCLP_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>; +} + +multiclass VSLD_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { + def X : VALUVX, + Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>; +} + +multiclass VSLD1_MV_X funct6, string vw = "v"> { + def X : VALUVX, + Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>; +} + +multiclass VSLD1_FV_F funct6, string vw = "v"> { + def F : VALUVF, + Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; +} + +multiclass VGTR_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { + def V : VALUVV, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>; + def X : VALUVX, + Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>; + def I : VALUVI, + Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>; +} + +multiclass VCPR_MV_Mask funct6, string vm = "v"> { + def M : VALUVVNoVm, + Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } multiclass VAMO { @@ -435,11 +779,48 @@ multiclass VAMO { def _UNWD : VAMONoWd; } -multiclass VWholeLoad nf, string opcodestr, RegisterClass VRC> { - def E8_V : VWholeLoad; - def E16_V : VWholeLoad; - def E32_V : VWholeLoad; - def E64_V : VWholeLoad; +multiclass VWholeLoad1 { + def E8_V : VWholeLoad<0, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD1R8, ReadVLDX]>; + def E16_V : VWholeLoad<0, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD1R16, ReadVLDX]>; + def E32_V : VWholeLoad<0, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD1R32, ReadVLDX]>; + def E64_V : VWholeLoad<0, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad2 { + def E8_V : VWholeLoad<1, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD2R8, ReadVLDX]>; + def E16_V : VWholeLoad<1, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD2R16, ReadVLDX]>; + def E32_V : VWholeLoad<1, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD2R32, ReadVLDX]>; + def E64_V : VWholeLoad<1, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD2R64, ReadVLDX]>; +} + +multiclass VWholeLoad4 { + def E8_V : VWholeLoad<3, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD4R8, ReadVLDX]>; + def E16_V : VWholeLoad<3, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD4R16, ReadVLDX]>; + def E32_V : VWholeLoad<3, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD4R32, ReadVLDX]>; + def E64_V : VWholeLoad<3, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD1R64, ReadVLDX]>; +} + +multiclass VWholeLoad8 { + def E8_V : VWholeLoad<7, LSWidth8, opcodestr # "e8.v", VRC>, + Sched<[WriteVLD8R8, ReadVLDX]>; + def E16_V : VWholeLoad<7, LSWidth16, opcodestr # "e16.v", VRC>, + Sched<[WriteVLD8R16, ReadVLDX]>; + def E32_V : VWholeLoad<7, LSWidth32, opcodestr # "e32.v", VRC>, + Sched<[WriteVLD8R32, ReadVLDX]>; + def E64_V : VWholeLoad<7, LSWidth64, opcodestr # "e64.v", VRC>, + Sched<[WriteVLD8R64, ReadVLDX]>; } //===----------------------------------------------------------------------===// @@ -459,69 +840,94 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLE8_V : VUnitStrideLoad; -def VLE16_V : VUnitStrideLoad; -def VLE32_V : VUnitStrideLoad; -def VLE64_V : VUnitStrideLoad; +def VLE8_V : VUnitStrideLoad, + VLESched<8>; +def VLE16_V : VUnitStrideLoad, + VLESched<16>; +def VLE32_V : VUnitStrideLoad, + VLESched<32>; +def VLE64_V : VUnitStrideLoad, + VLESched<64>; -def VLE8FF_V : VUnitStrideLoad; -def VLE16FF_V : VUnitStrideLoad; -def VLE32FF_V : VUnitStrideLoad; -def VLE64FF_V : VUnitStrideLoad; +def VLE8FF_V : VUnitStrideLoad, + VLFSched<8>; +def VLE16FF_V : VUnitStrideLoad, + VLFSched<16>; +def VLE32FF_V : VUnitStrideLoad, + VLFSched<32>; +def VLE64FF_V : VUnitStrideLoad, + VLFSched<64>; -def VLE1_V : VUnitStrideLoadMask<"vle1.v">; -def VSE1_V : VUnitStrideStoreMask<"vse1.v">; +def VLE1_V : VUnitStrideLoadMask<"vle1.v">, + Sched<[WriteVLDM, ReadVLDX]>; +def VSE1_V : VUnitStrideStoreMask<"vse1.v">, + Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>; -def VSE8_V : VUnitStrideStore; -def VSE16_V : VUnitStrideStore; -def VSE32_V : VUnitStrideStore; -def VSE64_V : VUnitStrideStore; +def VSE8_V : VUnitStrideStore, + VSESched<8>; +def VSE16_V : VUnitStrideStore, + VSESched<16>; +def VSE32_V : VUnitStrideStore, + VSESched<32>; +def VSE64_V : VUnitStrideStore, + VSESched<64>; // Vector Strided Instructions -def VLSE8_V : VStridedLoad; -def VLSE16_V : VStridedLoad; -def VLSE32_V : VStridedLoad; -def VLSE64_V : VStridedLoad; +def VLSE8_V : VStridedLoad, + VLSSched<8>; +def VLSE16_V : VStridedLoad, + VLSSched<16>; +def VLSE32_V : VStridedLoad, + VLSSched<32>; +def VLSE64_V : VStridedLoad, + VLSSched<32>; -def VSSE8_V : VStridedStore; -def VSSE16_V : VStridedStore; -def VSSE32_V : VStridedStore; -def VSSE64_V : VStridedStore; +def VSSE8_V : VStridedStore, + VSSSched<8>; +def VSSE16_V : VStridedStore, + VSSSched<16>; +def VSSE32_V : VStridedStore, + VSSSched<32>; +def VSSE64_V : VStridedStore, + VSSSched<64>; // Vector Indexed Instructions -def VLUXEI8_V : VIndexedLoad; -def VLUXEI16_V : VIndexedLoad; -def VLUXEI32_V : VIndexedLoad; -def VLUXEI64_V : VIndexedLoad; +foreach n = [8, 16, 32, 64] in { +defvar w = !cast("LSWidth" # n); -def VLOXEI8_V : VIndexedLoad; -def VLOXEI16_V : VIndexedLoad; -def VLOXEI32_V : VIndexedLoad; -def VLOXEI64_V : VIndexedLoad; +def VLUXEI # n # _V : + VIndexedLoad, + VLXSched; +def VLOXEI # n # _V : + VIndexedLoad, + VLXSched; -def VSUXEI8_V : VIndexedStore; -def VSUXEI16_V : VIndexedStore; -def VSUXEI32_V : VIndexedStore; -def VSUXEI64_V : VIndexedStore; +def VSUXEI # n # _V : + VIndexedStore, + VSXSched; +def VSOXEI # n # _V : + VIndexedStore, + VSXSched; +} -def VSOXEI8_V : VIndexedStore; -def VSOXEI16_V : VIndexedStore; -def VSOXEI32_V : VIndexedStore; -def VSOXEI64_V : VIndexedStore; +defm VL1R : VWholeLoad1<"vl1r", VR>; +defm VL2R : VWholeLoad2<"vl2r", VRM2>; +defm VL4R : VWholeLoad4<"vl4r", VRM4>; +defm VL8R : VWholeLoad8<"vl8r", VRM8>; -defm VL1R : VWholeLoad<0, "vl1r", VR>; -defm VL2R : VWholeLoad<1, "vl2r", VRM2>; -defm VL4R : VWholeLoad<3, "vl4r", VRM4>; -defm VL8R : VWholeLoad<7, "vl8r", VRM8>; def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>; def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>; def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>; -def VS1R_V : VWholeStore<0, "vs1r.v", VR>; -def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>; -def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>; -def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>; +def VS1R_V : VWholeStore<0, "vs1r.v", VR>, + Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>; +def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>, + Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>; +def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>, + Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>; +def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>, + Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>; // Vector Single-Width Integer Add and Subtract defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>; @@ -588,9 +994,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm", (VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>; // Vector Single-Width Bit Shift Instructions -defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>; -defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>; -defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; +defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>; +defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>; +defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>; // Vector Narrowing Integer Right Shift Instructions // Refer to 11.3. Narrowing Vector Arithmetic Instructions @@ -598,8 +1004,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>; // vector register group (specified by vs2). The destination vector register // group cannot overlap the mask register if used, unless LMUL=1. let Constraints = "@earlyclobber $vd" in { -defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; -defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; +defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">; +defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; } // Constraints = "@earlyclobber $vd" def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", @@ -607,14 +1013,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm", // Vector Integer Comparison Instructions let RVVConstraint = NoConstraint in { -defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>; -defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>; -defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>; -defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>; -defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>; -defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>; -defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>; -defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>; +defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>; +defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>; +defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>; +defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>; +defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>; +defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>; +defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>; +defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", @@ -672,84 +1078,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch), } // Vector Integer Min/Max Instructions -defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>; -defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>; -defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>; -defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>; +defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>; +defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>; +defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>; +defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>; // Vector Single-Width Integer Multiply Instructions -defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>; -defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>; -defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>; -defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>; +defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>; +defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>; +defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>; +defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>; // Vector Integer Divide Instructions -defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>; -defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>; -defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>; -defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>; +defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>; +defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>; +defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>; +defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>; // Vector Widening Integer Multiply Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>; -defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>; -defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>; +defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>; +defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>; +defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Integer Multiply-Add Instructions -defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>; -defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>; -defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>; -defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>; +defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>; +defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>; +defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>; +defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>; // Vector Widening Integer Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>; -defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>; -defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>; -defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>; +defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>; +defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>; +defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>; +defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Integer Merge Instructions -defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>; +defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>; // Vector Integer Move Instructions let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, RVVConstraint = NoConstraint in { // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), - (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">; + (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, + Sched<[WriteVIMovV, ReadVIMovV]>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), - (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">; + (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, + Sched<[WriteVIMovX, ReadVIMovX]>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), - (ins simm5:$imm), "vmv.v.i", "$vd, $imm">; + (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, + Sched<[WriteVIMovI]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions -defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>; -defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>; -defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>; -defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>; +defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>; +defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>; +defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>; +defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>; // Vector Single-Width Averaging Add and Subtract -defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>; -defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>; -defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>; -defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>; +defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>; +defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>; +defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>; +defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>; // Vector Single-Width Fractional Multiply with Rounding and Saturation -defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>; +defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>; // Vector Single-Width Scaling Shift Instructions -defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>; -defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>; +defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>; +defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>; // Vector Narrowing Fixed-Point Clip Instructions let Constraints = "@earlyclobber $vd" in { -defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; -defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; +defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">; +defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV] @@ -762,60 +1171,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>; // Vector Widening Floating-Point Add/Subtract Instructions let Constraints = "@earlyclobber $vd" in { let RVVConstraint = WidenV in { -defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>; -defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>; +defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>; +defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>; } // RVVConstraint = WidenV // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. let RVVConstraint = WidenW in { -defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">; -defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">; +defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">; +defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">; } // RVVConstraint = WidenW } // Constraints = "@earlyclobber $vd" // Vector Single-Width Floating-Point Multiply/Divide Instructions -defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>; -defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>; -defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>; +defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>; +defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>; +defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>; // Vector Widening Floating-Point Multiply let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>; +defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Single-Width Floating-Point Fused Multiply-Add Instructions -defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>; -defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>; -defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>; -defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>; -defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>; -defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>; -defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>; -defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>; +defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; +defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; +defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; +defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>; +defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>; +defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>; +defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; +defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; // Vector Widening Floating-Point Fused Multiply-Add Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in { -defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>; -defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>; -defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>; -defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; +defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>; +defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>; +defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>; +defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Floating-Point Square-Root Instruction -defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; -defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; -defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; +defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; +defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>; +defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>; // Vector Floating-Point MIN/MAX Instructions -defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; -defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>; +defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>; +defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>; // Vector Floating-Point Sign-Injection Instructions -defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>; -defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>; -defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>; +defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>; +defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>; +defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>; def : InstAlias<"vfneg.v $vd, $vs$vm", (VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>; @@ -824,12 +1233,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm", // Vector Floating-Point Compare Instructions let RVVConstraint = NoConstraint in { -defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>; -defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>; -defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>; -defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>; -defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>; -defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>; +defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>; +defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>; +defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>; +defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>; +defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; +defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; } // RVVConstraint = NoConstraint def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", @@ -838,68 +1247,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm", (VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>; // Vector Floating-Point Classify Instruction -defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>; +defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + // Vector Floating-Point Merge Instruction +let vm = 0 in def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd), (ins VR:$vs2, FPR32:$rs1, VMV0:$v0), - "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> { - let vm = 0; -} + "vfmerge.vfm", "$vd, $vs2, $rs1, v0">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; // Vector Floating-Point Move Instruction let RVVConstraint = NoConstraint in +let vm = 1, vs2 = 0 in def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd), - (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> { - let vs2 = 0; - let vm = 1; -} + (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">, + Sched<[WriteVFMovV, ReadVFMovF]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Single-Width Floating-Point/Integer Type-Convert Instructions -defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; -defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; -defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; -defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; -defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; -defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; +defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; +defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; +defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; +defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; +defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; +defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in { -defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; -defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; -defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; -defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; -defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; -defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; -defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; +defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; +defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; +defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; +defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; +defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; +defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; +defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt // Narrowing Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd" in { -defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; -defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; -defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; -defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; -defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; -defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; -defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; -defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; +defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; +defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; +defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; +defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; +defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; +defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; +defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; +defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; } // Constraints = "@earlyclobber $vd" } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { + // Vector Single-Width Integer Reduction Instructions let RVVConstraint = NoConstraint in { -defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>; -defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>; -defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>; -defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>; -defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>; -defm VREDAND : VALU_MV_V<"vredand", 0b000001>; -defm VREDOR : VALU_MV_V<"vredor", 0b000010>; -defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>; +defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>; +defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>; +defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>; +defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>; +defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>; +defm VREDAND : VRED_MV_V<"vredand", 0b000001>; +defm VREDOR : VRED_MV_V<"vredor", 0b000010>; +defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>; } // RVVConstraint = NoConstraint // Vector Widening Integer Reduction Instructions @@ -908,18 +1319,19 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>; -defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>; +defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>; +defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { // Vector Single-Width Floating-Point Reduction Instructions let RVVConstraint = NoConstraint in { -defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>; -defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>; -defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>; -defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>; +defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>; +defm VFREDSUM : VRED_FV_V<"vfredsum", 0b000001>; +defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>; +defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>; } // RVVConstraint = NoConstraint // Vector Widening Floating-Point Reduction Instructions @@ -928,22 +1340,22 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. -defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>; -defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>; +defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>; +defm VFWREDSUM : VWRED_FV_V<"vfwredsum", 0b110001>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Mask-Register Logical Instructions let RVVConstraint = NoConstraint in { -defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">; -defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">; -defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">; -defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">; -defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">; -defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">; -defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">; -defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">; +defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">; +defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">; +defm VMANDNOT_M : VMALU_MV_Mask<"vmandnot", 0b011000, "m">; +defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">; +defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">; +defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">; +defm VMORNOT_M : VMALU_MV_Mask<"vmornot", 0b011100, "m">; +defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">; } def : InstAlias<"vmmv.m $vd, $vs", @@ -957,98 +1369,113 @@ def : InstAlias<"vmnot.m $vd, $vs", let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { + // Vector mask population count vpopc def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vpopc.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vpopc.m", "$vd, $vs2$vm">, + Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>; // vfirst find-first-set mask bit def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), - (ins VR:$vs2, VMaskOp:$vm), - "vfirst.m", "$vd, $vs2$vm">; + (ins VR:$vs2, VMaskOp:$vm), + "vfirst.m", "$vd, $vs2$vm">, + Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>; + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { + // vmsbf.m set-before-first mask bit -defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>; +defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>; // vmsif.m set-including-first mask bit -defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>; +defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>; // vmsof.m set-only-first mask bit -defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>; +defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>; // Vector Iota Instruction -defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>; +defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>; + } // Constraints = "@earlyclobber $vd", RVVConstraint = Iota // Vector Element Index Instruction let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { + +let vs2 = 0 in def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd), - (ins VMaskOp:$vm), "vid.v", "$vd$vm"> { - let vs2 = 0; -} + (ins VMaskOp:$vm), "vid.v", "$vd$vm">, + Sched<[WriteVMIdxV, ReadVMask]>; // Integer Scalar Move Instructions let vm = 1, RVVConstraint = NoConstraint in { def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd), - (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">; + (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">, + Sched<[WriteVIMovVX, ReadVIMovVX]>; let Constraints = "$vd = $vd_wb" in def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb), - (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">; - + (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">, + Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>; } + } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 + } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1, RVVConstraint = NoConstraint in { // Floating-Point Scalar Move Instructions def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd), - (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">; + (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">, + Sched<[WriteVFMovVF, ReadVFMovVF]>; let Constraints = "$vd = $vd_wb" in def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb), - (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">; + (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">, + Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1 + } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Slide Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>; -defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; +defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>; +defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>; -defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>; +defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { -defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>; +defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp -defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>; +defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>; } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV] in { // Vector Register Gather Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { -defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>; -def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">; +defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>; +def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather // Vector Compress Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in { -defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>; +defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -foreach nf = [1, 2, 4, 8] in { - def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd), - (ins VR:$vs2), "vmv" # nf # "r.v", - "$vd, $vs2"> { - let Uses = []; - let vm = 1; - } +foreach n = [1, 2, 4, 8] in { + def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd), + (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, + VMVRSched { + let Uses = []; + let vm = 1; +} } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // Predicates = [HasStdExtV] diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index ed26a5026114..14f59152ed42 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -231,6 +231,9 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 314af180aca1..75ca6ca861be 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -219,6 +219,9 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; +//===----------------------------------------------------------------------===// +// Unsupported extensions +defm : UnsupportedSchedV; defm : UnsupportedSchedZba; defm : UnsupportedSchedZbb; defm : UnsupportedSchedZfh; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index f31e4af46c1b..4971ca1d4e3e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -230,3 +230,4 @@ def : ReadAdvance; // Include the scheduler resources for other instruction extensions. include "RISCVScheduleB.td" +include "RISCVScheduleV.td" diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td new file mode 100644 index 000000000000..43af1802d706 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -0,0 +1,820 @@ +//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with def operands. + +// 7. Vector Loads and Stores +// 7.4. Vector Unit-Stride Instructions +def WriteVLDE8 : SchedWrite; +def WriteVLDE16 : SchedWrite; +def WriteVLDE32 : SchedWrite; +def WriteVLDE64 : SchedWrite; +def WriteVSTE8 : SchedWrite; +def WriteVSTE16 : SchedWrite; +def WriteVSTE32 : SchedWrite; +def WriteVSTE64 : SchedWrite; +// 7.4.1. Vector Unit-Strided Mask +def WriteVLDM : SchedWrite; +def WriteVSTM : SchedWrite; +// 7.5. Vector Strided Instructions +def WriteVLDS8 : SchedWrite; +def WriteVLDS16 : SchedWrite; +def WriteVLDS32 : SchedWrite; +def WriteVLDS64 : SchedWrite; +def WriteVSTS8 : SchedWrite; +def WriteVSTS16 : SchedWrite; +def WriteVSTS32 : SchedWrite; +def WriteVSTS64 : SchedWrite; +// 7.6. Vector Indexed Instructions +def WriteVLDUX8 : SchedWrite; +def WriteVLDUX16 : SchedWrite; +def WriteVLDUX32 : SchedWrite; +def WriteVLDUX64 : SchedWrite; +def WriteVLDOX8 : SchedWrite; +def WriteVLDOX16 : SchedWrite; +def WriteVLDOX32 : SchedWrite; +def WriteVLDOX64 : SchedWrite; +def WriteVSTUX8 : SchedWrite; +def WriteVSTUX16 : SchedWrite; +def WriteVSTUX32 : SchedWrite; +def WriteVSTUX64 : SchedWrite; +def WriteVSTOX8 : SchedWrite; +def WriteVSTOX16 : SchedWrite; +def WriteVSTOX32 : SchedWrite; +def WriteVSTOX64 : SchedWrite; +// 7.7. Vector Unit-stride Fault-Only-First Loads +def WriteVLDFF8 : SchedWrite; +def WriteVLDFF16 : SchedWrite; +def WriteVLDFF32 : SchedWrite; +def WriteVLDFF64 : SchedWrite; +// 7.9. Vector Whole Register Instructions +def WriteVLD1R8 : SchedWrite; +def WriteVLD1R16 : SchedWrite; +def WriteVLD1R32 : SchedWrite; +def WriteVLD1R64 : SchedWrite; +def WriteVLD2R8 : SchedWrite; +def WriteVLD2R16 : SchedWrite; +def WriteVLD2R32 : SchedWrite; +def WriteVLD2R64 : SchedWrite; +def WriteVLD4R8 : SchedWrite; +def WriteVLD4R16 : SchedWrite; +def WriteVLD4R32 : SchedWrite; +def WriteVLD4R64 : SchedWrite; +def WriteVLD8R8 : SchedWrite; +def WriteVLD8R16 : SchedWrite; +def WriteVLD8R32 : SchedWrite; +def WriteVLD8R64 : SchedWrite; +def WriteVST1R : SchedWrite; +def WriteVST2R : SchedWrite; +def WriteVST4R : SchedWrite; +def WriteVST8R : SchedWrite; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def WriteVIALUV : SchedWrite; +def WriteVIALUX : SchedWrite; +def WriteVIALUI : SchedWrite; +// 11.2. Vector Widening Integer Add/Subtract +def WriteVIWALUV : SchedWrite; +def WriteVIWALUX : SchedWrite; +def WriteVIWALUI : SchedWrite; +// 11.3. Vector Integer Extension +def WriteVExtV : SchedWrite; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def WriteVICALUV : SchedWrite; +def WriteVICALUX : SchedWrite; +def WriteVICALUI : SchedWrite; +// 11.6. Vector Single-Width Bit Shift Instructions +def WriteVShiftV : SchedWrite; +def WriteVShiftX : SchedWrite; +def WriteVShiftI : SchedWrite; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def WriteVNShiftV : SchedWrite; +def WriteVNShiftX : SchedWrite; +def WriteVNShiftI : SchedWrite; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def WriteVICmpV : SchedWrite; +def WriteVICmpX : SchedWrite; +def WriteVICmpI : SchedWrite; +// 11.10. Vector Single-Width Integer Multiply Instructions +def WriteVIMulV : SchedWrite; +def WriteVIMulX : SchedWrite; +// 11.11. Vector Integer Divide Instructions +def WriteVIDivV : SchedWrite; +def WriteVIDivX : SchedWrite; +// 11.12. Vector Widening Integer Multiply Instructions +def WriteVIWMulV : SchedWrite; +def WriteVIWMulX : SchedWrite; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def WriteVIMulAddV : SchedWrite; +def WriteVIMulAddX : SchedWrite; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def WriteVIWMulAddV : SchedWrite; +def WriteVIWMulAddX : SchedWrite; +// 11.15. Vector Integer Merge Instructions +def WriteVIMergeV : SchedWrite; +def WriteVIMergeX : SchedWrite; +def WriteVIMergeI : SchedWrite; +// 11.16. Vector Integer Move Instructions +def WriteVIMovV : SchedWrite; +def WriteVIMovX : SchedWrite; +def WriteVIMovI : SchedWrite; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def WriteVSALUV : SchedWrite; +def WriteVSALUX : SchedWrite; +def WriteVSALUI : SchedWrite; +// 12.2. Vector Single-Width Averaging Add and Subtract +def WriteVAALUV : SchedWrite; +def WriteVAALUX : SchedWrite; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def WriteVSMulV : SchedWrite; +def WriteVSMulX : SchedWrite; +// 12.4. Vector Single-Width Scaling Shift Instructions +def WriteVSShiftV : SchedWrite; +def WriteVSShiftX : SchedWrite; +def WriteVSShiftI : SchedWrite; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def WriteVNClipV : SchedWrite; +def WriteVNClipX : SchedWrite; +def WriteVNClipI : SchedWrite; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def WriteVFALUV : SchedWrite; +def WriteVFALUF : SchedWrite; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def WriteVFWALUV : SchedWrite; +def WriteVFWALUF : SchedWrite; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def WriteVFMulV : SchedWrite; +def WriteVFMulF : SchedWrite; +def WriteVFDivV : SchedWrite; +def WriteVFDivF : SchedWrite; +// 13.5. Vector Widening Floating-Point Multiply +def WriteVFWMulV : SchedWrite; +def WriteVFWMulF : SchedWrite; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def WriteVFMulAddV : SchedWrite; +def WriteVFMulAddF : SchedWrite; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def WriteVFWMulAddV : SchedWrite; +def WriteVFWMulAddF : SchedWrite; +// 13.8. Vector Floating-Point Square-Root Instruction +def WriteVFSqrtV : SchedWrite; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def WriteVFRecpV : SchedWrite; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def WriteVFCmpV : SchedWrite; +def WriteVFCmpF : SchedWrite; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def WriteVFSgnjV : SchedWrite; +def WriteVFSgnjF : SchedWrite; +// 13.14. Vector Floating-Point Classify Instruction +def WriteVFClassV : SchedWrite; +// 13.15. Vector Floating-Point Merge Instruction +def WriteVFMergeV : SchedWrite; +// 13.16. Vector Floating-Point Move Instruction +def WriteVFMovV : SchedWrite; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def WriteVFCvtIToFV : SchedWrite; +def WriteVFCvtFToIV : SchedWrite; +def WriteVFCvtFToFV : SchedWrite; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def WriteVFWCvtIToFV : SchedWrite; +def WriteVFWCvtFToIV : SchedWrite; +def WriteVFWCvtFToFV : SchedWrite; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def WriteVFNCvtIToFV : SchedWrite; +def WriteVFNCvtFToIV : SchedWrite; +def WriteVFNCvtFToFV : SchedWrite; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def WriteVIRedV : SchedWrite; +// 14.2. Vector Widening Integer Reduction Instructions +def WriteVIWRedV : SchedWrite; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def WriteVFRedV : SchedWrite; +def WriteVFRedOV : SchedWrite; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def WriteVFWRedV : SchedWrite; +def WriteVFWRedOV : SchedWrite; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def WriteVMALUV : SchedWrite; +// 15.2. Vector Mask Population Count +def WriteVMPopV : SchedWrite; +// 15.3. Vector Find-First-Set Mask Bit +def WriteVMFFSV : SchedWrite; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def WriteVMSFSV : SchedWrite; +// 15.8. Vector Iota Instruction +def WriteVMIotV : SchedWrite; +// 15.9. Vector Element Index Instruction +def WriteVMIdxV : SchedWrite; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def WriteVIMovVX : SchedWrite; +def WriteVIMovXV : SchedWrite; +// 16.2. Floating-Point Scalar Move Instructions +def WriteVFMovVF : SchedWrite; +def WriteVFMovFV : SchedWrite; +// 16.3. Vector Slide Instructions +def WriteVISlideX : SchedWrite; +def WriteVISlideI : SchedWrite; +def WriteVISlide1X : SchedWrite; +def WriteVFSlide1F : SchedWrite; +// 16.4. Vector Register Gather Instructions +def WriteVGatherV : SchedWrite; +def WriteVGatherX : SchedWrite; +def WriteVGatherI : SchedWrite; +// 16.5. Vector Compress Instruction +def WriteVCompressV : SchedWrite; +// 16.6. Whole Vector Register Move +def WriteVMov1V : SchedWrite; +def WriteVMov2V : SchedWrite; +def WriteVMov4V : SchedWrite; +def WriteVMov8V : SchedWrite; + +//===----------------------------------------------------------------------===// +/// Define scheduler resources associated with use operands. + +// 7. Vector Loads and Stores +def ReadVLDX : SchedRead; +def ReadVSTX : SchedRead; +// 7.4. Vector Unit-Stride Instructions +def ReadVSTE8V : SchedRead; +def ReadVSTE16V : SchedRead; +def ReadVSTE32V : SchedRead; +def ReadVSTE64V : SchedRead; +// 7.4.1. Vector Unit-Strided Mask +def ReadVSTM : SchedRead; +// 7.5. Vector Strided Instructions +def ReadVLDSX : SchedRead; +def ReadVSTSX : SchedRead; +def ReadVSTS8V : SchedRead; +def ReadVSTS16V : SchedRead; +def ReadVSTS32V : SchedRead; +def ReadVSTS64V : SchedRead; +// 7.6. Vector Indexed Instructions +def ReadVLDUXV : SchedRead; +def ReadVLDOXV : SchedRead; +def ReadVSTUX8 : SchedRead; +def ReadVSTUX16 : SchedRead; +def ReadVSTUX32 : SchedRead; +def ReadVSTUX64 : SchedRead; +def ReadVSTUXV : SchedRead; +def ReadVSTUX8V : SchedRead; +def ReadVSTUX16V : SchedRead; +def ReadVSTUX32V : SchedRead; +def ReadVSTUX64V : SchedRead; +def ReadVSTOX8 : SchedRead; +def ReadVSTOX16 : SchedRead; +def ReadVSTOX32 : SchedRead; +def ReadVSTOX64 : SchedRead; +def ReadVSTOXV : SchedRead; +def ReadVSTOX8V : SchedRead; +def ReadVSTOX16V : SchedRead; +def ReadVSTOX32V : SchedRead; +def ReadVSTOX64V : SchedRead; +// 7.9. Vector Whole Register Instructions +def ReadVST1R : SchedRead; +def ReadVST2R : SchedRead; +def ReadVST4R : SchedRead; +def ReadVST8R : SchedRead; + +// 11. Vector Integer Arithmetic Instructions +// 11.1. Vector Single-Width Integer Add and Subtract +// 11.5. Vector Bitwise Logical Instructions +def ReadVIALUV : SchedRead; +def ReadVIALUX : SchedRead; +// 11.2. Vector Widening Integer Add/Subtract +def ReadVIWALUV : SchedRead; +def ReadVIWALUX : SchedRead; +// 11.3. Vector Integer Extension +def ReadVExtV : SchedRead; +// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions +def ReadVIALUCV : SchedRead; +def ReadVIALUCX : SchedRead; +// 11.6. Vector Single-Width Bit Shift Instructions +def ReadVShiftV : SchedRead; +def ReadVShiftX : SchedRead; +// 11.7. Vector Narrowing Integer Right Shift Instructions +def ReadVNShiftV : SchedRead; +def ReadVNShiftX : SchedRead; +// 11.8. Vector Integer Comparison Instructions +// 11.9. Vector Integer Min/Max Instructions +def ReadVICmpV : SchedRead; +def ReadVICmpX : SchedRead; +// 11.10. Vector Single-Width Integer Multiply Instructions +def ReadVIMulV : SchedRead; +def ReadVIMulX : SchedRead; +// 11.11. Vector Integer Divide Instructions +def ReadVIDivV : SchedRead; +def ReadVIDivX : SchedRead; +// 11.12. Vector Widening Integer Multiply Instructions +def ReadVIWMulV : SchedRead; +def ReadVIWMulX : SchedRead; +// 11.13. Vector Single-Width Integer Multiply-Add Instructions +def ReadVIMulAddV : SchedRead; +def ReadVIMulAddX : SchedRead; +// 11.14. Vector Widening Integer Multiply-Add Instructions +def ReadVIWMulAddV : SchedRead; +def ReadVIWMulAddX : SchedRead; +// 11.15. Vector Integer Merge Instructions +def ReadVIMergeV : SchedRead; +def ReadVIMergeX : SchedRead; +// 11.16. Vector Integer Move Instructions +def ReadVIMovV : SchedRead; +def ReadVIMovX : SchedRead; + +// 12. Vector Fixed-Point Arithmetic Instructions +// 12.1. Vector Single-Width Saturating Add and Subtract +def ReadVSALUV : SchedRead; +def ReadVSALUX : SchedRead; +// 12.2. Vector Single-Width Averaging Add and Subtract +def ReadVAALUV : SchedRead; +def ReadVAALUX : SchedRead; +// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation +def ReadVSMulV : SchedRead; +def ReadVSMulX : SchedRead; +// 12.4. Vector Single-Width Scaling Shift Instructions +def ReadVSShiftV : SchedRead; +def ReadVSShiftX : SchedRead; +// 12.5. Vector Narrowing Fixed-Point Clip Instructions +def ReadVNClipV : SchedRead; +def ReadVNClipX : SchedRead; + +// 13. Vector Floating-Point Instructions +// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions +def ReadVFALUV : SchedRead; +def ReadVFALUF : SchedRead; +// 13.3. Vector Widening Floating-Point Add/Subtract Instructions +def ReadVFWALUV : SchedRead; +def ReadVFWALUF : SchedRead; +// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions +def ReadVFMulV : SchedRead; +def ReadVFMulF : SchedRead; +def ReadVFDivV : SchedRead; +def ReadVFDivF : SchedRead; +// 13.5. Vector Widening Floating-Point Multiply +def ReadVFWMulV : SchedRead; +def ReadVFWMulF : SchedRead; +// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions +def ReadVFMulAddV : SchedRead; +def ReadVFMulAddF : SchedRead; +// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions +def ReadVFWMulAddV : SchedRead; +def ReadVFWMulAddF : SchedRead; +// 13.8. Vector Floating-Point Square-Root Instruction +def ReadVFSqrtV : SchedRead; +// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction +// 13.10. Vector Floating-Point Reciprocal Estimate Instruction +def ReadVFRecpV : SchedRead; +// 13.11. Vector Floating-Point MIN/MAX Instructions +// 13.13. Vector Floating-Point Compare Instructions +def ReadVFCmpV : SchedRead; +def ReadVFCmpF : SchedRead; +// 13.12. Vector Floating-Point Sign-Injection Instructions +def ReadVFSgnjV : SchedRead; +def ReadVFSgnjF : SchedRead; +// 13.14. Vector Floating-Point Classify Instruction +def ReadVFClassV : SchedRead; +// 13.15. Vector Floating-Point Merge Instruction +def ReadVFMergeV : SchedRead; +def ReadVFMergeF : SchedRead; +// 13.16. Vector Floating-Point Move Instruction +def ReadVFMovF : SchedRead; +// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions +def ReadVFCvtIToFV : SchedRead; +def ReadVFCvtFToIV : SchedRead; +// 13.18. Widening Floating-Point/Integer Type-Convert Instructions +def ReadVFWCvtIToFV : SchedRead; +def ReadVFWCvtFToIV : SchedRead; +def ReadVFWCvtFToFV : SchedRead; +// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions +def ReadVFNCvtIToFV : SchedRead; +def ReadVFNCvtFToIV : SchedRead; +def ReadVFNCvtFToFV : SchedRead; + +// 14. Vector Reduction Operations +// 14.1. Vector Single-Width Integer Reduction Instructions +def ReadVIRedV : SchedRead; +def ReadVIRedV0 : SchedRead; +// 14.2. Vector Widening Integer Reduction Instructions +def ReadVIWRedV : SchedRead; +def ReadVIWRedV0 : SchedRead; +// 14.3. Vector Single-Width Floating-Point Reduction Instructions +def ReadVFRedV : SchedRead; +def ReadVFRedV0 : SchedRead; +def ReadVFRedOV : SchedRead; +def ReadVFRedOV0 : SchedRead; +// 14.4. Vector Widening Floating-Point Reduction Instructions +def ReadVFWRedV : SchedRead; +def ReadVFWRedV0 : SchedRead; +def ReadVFWRedOV : SchedRead; +def ReadVFWRedOV0 : SchedRead; + +// 15. Vector Mask Instructions +// 15.1. Vector Mask-Register Logical Instructions +def ReadVMALUV : SchedRead; +// 15.2. Vector Mask Population Count +def ReadVMPopV : SchedRead; +// 15.3. Vector Find-First-Set Mask Bit +def ReadVMFFSV : SchedRead; +// 15.4. Vector Set-Before-First Mask Bit +// 15.5. Vector Set-Including-First Mask Bit +// 15.6. Vector Set-only-First Mask Bit +def ReadVMSFSV : SchedRead; +// 15.8. Vector Iota Instruction +def ReadVMIotV : SchedRead; + +// 16. Vector Permutation Instructions +// 16.1. Integer Scalar Move Instructions +def ReadVIMovVX : SchedRead; +def ReadVIMovXV : SchedRead; +def ReadVIMovXX : SchedRead; +// 16.2. Floating-Point Scalar Move Instructions +def ReadVFMovVF : SchedRead; +def ReadVFMovFV : SchedRead; +def ReadVFMovFX : SchedRead; +// 16.3. Vector Slide Instructions +def ReadVISlideV : SchedRead; +def ReadVISlideX : SchedRead; +def ReadVFSlideV : SchedRead; +def ReadVFSlideF : SchedRead; +// 16.4. Vector Register Gather Instructions +def ReadVGatherV : SchedRead; +def ReadVGatherX : SchedRead; +// 16.5. Vector Compress Instruction +def ReadVCompressV : SchedRead; +// 16.6. Whole Vector Register Move +def ReadVMov1V : SchedRead; +def ReadVMov2V : SchedRead; +def ReadVMov4V : SchedRead; +def ReadVMov8V : SchedRead; + +// Others +def ReadVMask : SchedRead; + +//===----------------------------------------------------------------------===// +/// Define default scheduler resources for V. + +multiclass UnsupportedSchedV { +let Unsupported = true in { + +// 7. Vector Loads and Stores +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 12. Vector Integer Arithmetic Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 14. Vector Floating-Point Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 15. Vector Reduction Operations +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 16. Vector Mask Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 17. Vector Permutation Instructions +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 7. Vector Loads and Stores +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 12. Vector Integer Arithmetic Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 13. Vector Fixed-Point Arithmetic Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 14. Vector Floating-Point Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 15. Vector Reduction Operations +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 16. Vector Mask Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 17. Vector Permutation Instructions +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Others +def : ReadAdvance; + +} // Unsupported +} // UnsupportedSchedV diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3a64b3460030..a69850896436 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6704,17 +6704,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { auto *MemIntr = cast(Op); SDValue Ptr = MemIntr->getBasePtr(); + // The source constant may be larger than the subvector broadcast, + // ensure we extract the correct subvector constants. if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { Type *CstTy = Cst->getType(); unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); - if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0) + unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); + if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || + (SizeInBits % SubVecSizeInBits) != 0) return false; - unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits(); - unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits; - unsigned NumSubVecs = SizeInBits / CstSizeInBits; + unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; + unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; APInt UndefSubElts(NumSubElts, 0); SmallVector SubEltBits(NumSubElts * NumSubVecs, - APInt(SubEltSizeInBits, 0)); + APInt(CstEltSizeInBits, 0)); for (unsigned i = 0; i != NumSubElts; ++i) { if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], UndefSubElts, i)) diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index e83e1e74ff52..ba00e7da81f9 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -708,6 +708,19 @@ class BinOpRM opcode, string mnemonic, X86TypeInfo typeinfo, mnemonic, "{$src2, $src1|$src1, $src2}", pattern>, Sched<[sched.Folded, sched.ReadAfterFold]>; +// BinOpRM - Instructions like "adc reg, reg, [mem]". +// There is an implicit register read at the end of the operand sequence. +class BinOpRM_ImplicitUse opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, X86FoldableSchedWrite sched, list pattern> + : ITy, + Sched<[sched.Folded, sched.ReadAfterFold, + // base, scale, index, offset, segment. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // implicit register read. + sched.ReadAfterFold]>; + // BinOpRM_F - Instructions like "cmp reg, [mem]". class BinOpRM_F opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> @@ -725,7 +738,7 @@ class BinOpRM_RF opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpRM_RFF - Instructions like "adc reg, reg, [mem]". class BinOpRM_RFF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> - : BinOpRM; @@ -805,7 +818,11 @@ class BinOpMR_RMW opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR, Sched<[WriteALURMW]>; + (implicit EFLAGS)]>, Sched<[WriteALURMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold]>; // reg // BinOpMR_RMW_FF - Instructions like "adc [mem], reg". class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, @@ -813,7 +830,12 @@ class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMR, Sched<[WriteADCRMW]>; + (implicit EFLAGS)]>, Sched<[WriteADCRMW, + // base, scale, index, offset, segment + ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, + WriteALU.ReadAfterFold, // reg + WriteALU.ReadAfterFold]>; // EFLAGS // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 762317425026..91b16ec66ee3 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/ValueHandle.h" @@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) { return Constant::getNullValue(&Ty); if (C->getType()->isPointerTy() && Ty.isPointerTy()) return ConstantExpr::getPointerCast(C, &Ty); - if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) - return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); - if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) - return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) { + if (C->getType()->isIntegerTy() && Ty.isIntegerTy()) + return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true); + if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy()) + return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true); + } } return nullptr; } @@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref Pred, while (!Worklist.empty()) { const Use *U = Worklist.pop_back_val(); - if (!Visited.insert(U).second) + if (isa(U->getUser()) && !Visited.insert(U).second) continue; LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser() << "\n"); @@ -1925,49 +1928,85 @@ void Attributor::createShallowWrapper(Function &F) { NumFnShallowWrappersCreated++; } +bool Attributor::isInternalizable(Function &F) { + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return false; + return true; +} + Function *Attributor::internalizeFunction(Function &F, bool Force) { if (!AllowDeepWrapper && !Force) return nullptr; - if (F.isDeclaration() || F.hasLocalLinkage() || - GlobalValue::isInterposableLinkage(F.getLinkage())) + if (!isInternalizable(F)) return nullptr; - Module &M = *F.getParent(); - FunctionType *FnTy = F.getFunctionType(); + SmallPtrSet FnSet = {&F}; + DenseMap InternalizedFns; + internalizeFunctions(FnSet, InternalizedFns); - // create a copy of the current function - Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), - F.getName() + ".internalized"); - ValueToValueMapTy VMap; - auto *NewFArgIt = Copied->arg_begin(); - for (auto &Arg : F.args()) { - auto ArgName = Arg.getName(); - NewFArgIt->setName(ArgName); - VMap[&Arg] = &(*NewFArgIt++); + return InternalizedFns[&F]; +} + +bool Attributor::internalizeFunctions(SmallPtrSetImpl &FnSet, + DenseMap &FnMap) { + for (Function *F : FnSet) + if (!Attributor::isInternalizable(*F)) + return false; + + FnMap.clear(); + // Generate the internalized version of each function. + for (Function *F : FnSet) { + Module &M = *F->getParent(); + FunctionType *FnTy = F->getFunctionType(); + + // Create a copy of the current function + Function *Copied = + Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(), + F->getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F->args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + // Set the linakage and visibility late as CloneFunctionInto has some + // implicit requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + + // Copy metadata + SmallVector, 1> MDs; + F->getAllMetadata(MDs); + for (auto MDIt : MDs) + if (!Copied->hasMetadata()) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F->getIterator(), Copied); + Copied->setDSOLocal(true); + FnMap[F] = Copied; } - SmallVector Returns; - // Copy the body of the original function to the new one - CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, - Returns); + // Replace all uses of the old function with the new internalized function + // unless the caller is a function that was just internalized. + for (Function *F : FnSet) { + auto &InternalizedFn = FnMap[F]; + auto IsNotInternalized = [&](Use &U) -> bool { + if (auto *CB = dyn_cast(U.getUser())) + return !FnMap.lookup(CB->getCaller()); + return false; + }; + F->replaceUsesWithIf(InternalizedFn, IsNotInternalized); + } - // Set the linakage and visibility late as CloneFunctionInto has some implicit - // requirements. - Copied->setVisibility(GlobalValue::DefaultVisibility); - Copied->setLinkage(GlobalValue::PrivateLinkage); - - // Copy metadata - SmallVector, 1> MDs; - F.getAllMetadata(MDs); - for (auto MDIt : MDs) - if (!Copied->hasMetadata()) - Copied->addMetadata(MDIt.first, *MDIt.second); - - M.getFunctionList().insert(F.getIterator(), Copied); - F.replaceAllUsesWith(Copied); - Copied->setDSOLocal(true); - - return Copied; + return true; } bool Attributor::isValidFunctionSignatureRewrite( diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 98ce286d5139..3529923a9082 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1149,19 +1149,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return true; }; + /// Helper struct, will support ranges eventually. + struct OffsetInfo { + int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown; + + bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + }; + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { using namespace AA::PointerInfo; State S = getState(); ChangeStatus Changed = ChangeStatus::UNCHANGED; Value &AssociatedValue = getAssociatedValue(); - struct OffsetInfo { - int64_t Offset = 0; - }; const DataLayout &DL = A.getDataLayout(); DenseMap OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = {}; + OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI, bool &Follow) { @@ -1219,8 +1223,48 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa(Usr) || isa(Usr) || isa(Usr)) + if (isa(Usr) || isa(Usr)) return HandlePassthroughUser(Usr, PtrOI, Follow); + + // For PHIs we need to take care of the recurrence explicitly as the value + // might change while we iterate through a loop. For now, we give up if + // the PHI is not invariant. + if (isa(Usr)) { + // Check if the PHI is invariant (so far). + OffsetInfo &UsrOI = OffsetInfoMap[Usr]; + if (UsrOI == PtrOI) + return true; + + // Check if the PHI operand has already an unknown offset as we can't + // improve on that anymore. + if (PtrOI.Offset == OffsetAndSize::Unknown) { + UsrOI = PtrOI; + Follow = true; + return true; + } + + // Check if the PHI operand is not dependent on the PHI itself. + APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0); + if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true)) { + if (Offset != PtrOI.Offset) { + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + return false; + } + return HandlePassthroughUser(Usr, PtrOI, Follow); + } + + // TODO: Approximate in case we know the direction of the recurrence. + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); + UsrOI = PtrOI; + UsrOI.Offset = OffsetAndSize::Unknown; + Follow = true; + return true; + } + if (auto *LoadI = dyn_cast(Usr)) return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AccessKind::AK_READ, PtrOI.Offset, Changed, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index b80349352719..d6b97915ede6 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { ORE.emit([&]() { OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); return ORA << "Could not internalize function. " - << "Some optimizations may not be possible."; + << "Some optimizations may not be possible. [OMP140]"; }); }; // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. - DenseSet InternalizedFuncs; - if (isOpenMPDevice(M)) + DenseMap InternalizedMap; + if (isOpenMPDevice(M)) { + SmallPtrSet InternalizeFns; for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && !DisableInternalization) { - if (Attributor::internalizeFunction(F, /* Force */ true)) { - InternalizedFuncs.insert(&F); + if (Attributor::isInternalizable(F)) { + InternalizeFns.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } + Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); + } + // Look at every function in the Module unless it was internalized. SmallVector SCC; for (Function &F : M) - if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) + if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) SCC.push_back(&F); if (SCC.empty()) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2b0ef0c5f2cc..c5e14ebf3ae3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5158,6 +5158,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { if (!isa(Op1) && Op1Min == Op1Max) return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); + // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a + // min/max canonical compare with some other compare. That could lead to + // conflict with select canonicalization and infinite looping. + // FIXME: This constraint may go away if min/max intrinsics are canonical. + auto isMinMaxCmp = [&](Instruction &Cmp) { + if (!Cmp.hasOneUse()) + return false; + Value *A, *B; + SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor; + if (!SelectPatternResult::isMinOrMax(SPF)) + return false; + return match(Op0, m_MaxOrMin(m_Value(), m_Value())) || + match(Op1, m_MaxOrMin(m_Value(), m_Value())); + }; + if (!isMinMaxCmp(I)) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_ULT: { + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A A == C-1 if min(A)+1 == C + if (*CmpC == Op0Min + 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + // X X == 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_UGT: { + if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + // A >u C -> A == C+1 if max(a)-1 == C + if (*CmpC == Op0Max - 1) + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + // X >u C --> X != 0, if the number of zero bits in the bottom of X + // exceeds the log2 of C. + if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, + Constant::getNullValue(Op1->getType())); + } + break; + } + case ICmpInst::ICMP_SLT: { + if (Op1Min == Op0Max) // A A != B if max(A) == min(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Min + 1) // A A == C-1 if min(A)+1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC - 1)); + } + break; + } + case ICmpInst::ICMP_SGT: { + if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) + return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); + const APInt *CmpC; + if (match(Op1, m_APInt(CmpC))) { + if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + ConstantInt::get(Op1->getType(), *CmpC + 1)); + } + break; + } + } + } + // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. switch (Pred) { @@ -5219,21 +5296,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A A == C-1 if min(A)+1 == C - if (*CmpC == Op0Min + 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - // X X == 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2()) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_UGT: { @@ -5241,21 +5303,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - // A >u C -> A == C+1 if max(a)-1 == C - if (*CmpC == Op0Max - 1) - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - // X >u C --> X != 0, if the number of zero bits in the bottom of X - // exceeds the log2 of C. - if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits()) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, - Constant::getNullValue(Op1->getType())); - } break; } case ICmpInst::ICMP_SLT: { @@ -5263,14 +5310,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Min == Op0Max) // A A != B if max(A) == min(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Min + 1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC - 1)); - } break; } case ICmpInst::ICMP_SGT: { @@ -5278,14 +5317,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) { return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); - if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) - return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); - const APInt *CmpC; - if (match(Op1, m_APInt(CmpC))) { - if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - ConstantInt::get(Op1->getType(), *CmpC + 1)); - } break; } case ICmpInst::ICMP_SGE: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index a8474e27383d..80abc775299a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -261,8 +261,8 @@ private: bool PointerReplacer::collectUsers(Instruction &I) { for (auto U : I.users()) { - Instruction *Inst = cast(&*U); - if (LoadInst *Load = dyn_cast(Inst)) { + auto *Inst = cast(&*U); + if (auto *Load = dyn_cast(Inst)) { if (Load->isVolatile()) return false; Worklist.insert(Load); @@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) { Worklist.insert(Inst); if (!collectUsers(*Inst)) return false; - } else if (isa(Inst)) { + } else if (auto *MI = dyn_cast(Inst)) { + if (MI->isVolatile()) + return false; Worklist.insert(Inst); } else if (Inst->isLifetimeStartOrEnd()) { continue; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ce2b913dba61..5bbc3c87ca4f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3230,7 +3230,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *Mask; if (match(TrueVal, m_Zero()) && match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask), - m_CombineOr(m_Undef(), m_Zero())))) { + m_CombineOr(m_Undef(), m_Zero()))) && + (CondVal->getType() == Mask->getType())) { // We can remove the select by ensuring the load zeros all lanes the // select would have. We determine this by proving there is no overlap // between the load and select masks. diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b585818af595..404852f1dd4d 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1981,6 +1981,9 @@ class LSRInstance { /// IV users that belong to profitable IVChains. SmallPtrSet IVIncSet; + /// Induction variables that were generated and inserted by the SCEV Expander. + SmallVector ScalarEvolutionIVs; + void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); @@ -2085,6 +2088,9 @@ public: TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); bool getChanged() const { return Changed; } + const SmallVectorImpl &getScalarEvolutionIVs() const { + return ScalarEvolutionIVs; + } void print_factors_and_types(raw_ostream &OS) const; void print_fixups(raw_ostream &OS) const; @@ -5589,6 +5595,11 @@ void LSRInstance::ImplementSolution( GenerateIVChain(Chain, Rewriter, DeadInsts); Changed = true; } + + for (const WeakVH &IV : Rewriter.getInsertedIVs()) + if (IV && dyn_cast(&*IV)->getParent()) + ScalarEvolutionIVs.push_back(IV); + // Clean up after ourselves. This must be done before deleting any // instructions. Rewriter.clear(); @@ -5859,87 +5870,399 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); } -using EqualValues = SmallVector, 4>; -using EqualValuesMap = - DenseMap>>; -using LocationMap = - DenseMap>; +struct SCEVDbgValueBuilder { + SCEVDbgValueBuilder() = default; + SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { + Values = Base.Values; + Expr = Base.Expr; + } -static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE, - EqualValuesMap &DbgValueToEqualSet, - LocationMap &DbgValueToLocation) { + /// The DIExpression as we translate the SCEV. + SmallVector Expr; + /// The location ops of the DIExpression. + SmallVector Values; + + void pushOperator(uint64_t Op) { Expr.push_back(Op); } + void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } + + /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value + /// in the set of values referenced by the expression. + void pushValue(llvm::Value *V) { + Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); + auto *It = + std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V)); + unsigned ArgIndex = 0; + if (It != Values.end()) { + ArgIndex = std::distance(Values.begin(), It); + } else { + ArgIndex = Values.size(); + Values.push_back(llvm::ValueAsMetadata::get(V)); + } + Expr.push_back(ArgIndex); + } + + void pushValue(const SCEVUnknown *U) { + llvm::Value *V = cast(U)->getValue(); + pushValue(V); + } + + bool pushConst(const SCEVConstant *C) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; + Expr.push_back(llvm::dwarf::DW_OP_consts); + Expr.push_back(C->getAPInt().getSExtValue()); + return true; + } + + /// Several SCEV types are sequences of the same arithmetic operator applied + /// to constants and values that may be extended or truncated. + bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, + uint64_t DwarfOp) { + assert((isa(CommExpr) || isa(CommExpr)) && + "Expected arithmetic SCEV type"); + bool Success = true; + unsigned EmitOperator = 0; + for (auto &Op : CommExpr->operands()) { + Success &= pushSCEV(Op); + + if (EmitOperator >= 1) + pushOperator(DwarfOp); + ++EmitOperator; + } + return Success; + } + + // TODO: Identify and omit noop casts. + bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { + const llvm::SCEV *Inner = C->getOperand(0); + const llvm::Type *Type = C->getType(); + uint64_t ToWidth = Type->getIntegerBitWidth(); + bool Success = pushSCEV(Inner); + uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, + IsSigned ? llvm::dwarf::DW_ATE_signed + : llvm::dwarf::DW_ATE_unsigned}; + for (const auto &Op : CastOps) + pushOperator(Op); + return Success; + } + + // TODO: MinMax - although these haven't been encountered in the test suite. + bool pushSCEV(const llvm::SCEV *S) { + bool Success = true; + if (const SCEVConstant *StartInt = dyn_cast(S)) { + Success &= pushConst(StartInt); + + } else if (const SCEVUnknown *U = dyn_cast(S)) { + if (!U->getValue()) + return false; + pushValue(U->getValue()); + + } else if (const SCEVMulExpr *MulRec = dyn_cast(S)) { + Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); + + } else if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { + Success &= pushSCEV(UDiv->getLHS()); + Success &= pushSCEV(UDiv->getRHS()); + pushOperator(llvm::dwarf::DW_OP_div); + + } else if (const SCEVCastExpr *Cast = dyn_cast(S)) { + // Assert if a new and unknown SCEVCastEXpr type is encountered. + assert((isa(Cast) || isa(Cast) || + isa(Cast) || isa(Cast)) && + "Unexpected cast type in SCEV."); + Success &= pushCast(Cast, (isa(Cast))); + + } else if (const SCEVAddExpr *AddExpr = dyn_cast(S)) { + Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); + + } else if (isa(S)) { + // Nested SCEVAddRecExpr are generated by nested loops and are currently + // unsupported. + return false; + + } else { + return false; + } + return Success; + } + + void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) { + // Re-state assumption that this dbg.value is not variadic. Any remaining + // opcodes in its expression operate on a single value already on the + // expression stack. Prepend our operations, which will re-compute and + // place that value on the expression stack. + assert(!DI.hasArgList()); + auto *NewExpr = + DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true); + DI.setExpression(NewExpr); + + auto ValArrayRef = llvm::ArrayRef(Values); + DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef)); + } + + /// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the + /// location op index 0. + void setShortFinalExpression(llvm::DbgValueInst &DI, + const DIExpression *OldExpr) { + assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) && + "Expected DW_OP_llvm_arg and 0."); + DI.replaceVariableLocationOp( + 0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0])); + + // See setFinalExpression: prepend our opcodes on the start of any old + // expression opcodes. + assert(!DI.hasArgList()); + llvm::SmallVector FinalExpr(Expr.begin() + 2, Expr.end()); + auto *NewExpr = + DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true); + DI.setExpression(NewExpr); + } + + /// Once the IV and variable SCEV translation is complete, write it to the + /// source DVI. + void applyExprToDbgValue(llvm::DbgValueInst &DI, + const DIExpression *OldExpr) { + assert(!Expr.empty() && "Unexpected empty expression."); + // Emit a simpler form if only a single location is referenced. + if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && + Expr[1] == 0) { + setShortFinalExpression(DI, OldExpr); + } else { + setFinalExpression(DI, OldExpr); + } + } + + /// Return true if the combination of arithmetic operator and underlying + /// SCEV constant value is an identity function. + bool isIdentityFunction(uint64_t Op, const SCEV *S) { + if (const SCEVConstant *C = dyn_cast(S)) { + if (C->getAPInt().getMinSignedBits() > 64) + return false; + int64_t I = C->getAPInt().getSExtValue(); + switch (Op) { + case llvm::dwarf::DW_OP_plus: + case llvm::dwarf::DW_OP_minus: + return I == 0; + case llvm::dwarf::DW_OP_mul: + case llvm::dwarf::DW_OP_div: + return I == 1; + } + } + return false; + } + + /// Convert a SCEV of a value to a DIExpression that is pushed onto the + /// builder's expression stack. The stack should already contain an + /// expression for the iteration count, so that it can be multiplied by + /// the stride and added to the start. + /// Components of the expression are omitted if they are an identity function. + /// Chain (non-affine) SCEVs are not supported. + bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { + assert(SAR.isAffine() && "Expected affine SCEV"); + // TODO: Is this check needed? + if (isa(SAR.getStart())) + return false; + + const SCEV *Start = SAR.getStart(); + const SCEV *Stride = SAR.getStepRecurrence(SE); + + // Skip pushing arithmetic noops. + if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { + if (!pushSCEV(Stride)) + return false; + pushOperator(llvm::dwarf::DW_OP_mul); + } + if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { + if (!pushSCEV(Start)) + return false; + pushOperator(llvm::dwarf::DW_OP_plus); + } + return true; + } + + /// Convert a SCEV of a value to a DIExpression that is pushed onto the + /// builder's expression stack. The stack should already contain an + /// expression for the iteration count, so that it can be multiplied by + /// the stride and added to the start. + /// Components of the expression are omitted if they are an identity function. + bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, + ScalarEvolution &SE) { + assert(SAR.isAffine() && "Expected affine SCEV"); + if (isa(SAR.getStart())) { + LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " + << SAR << '\n'); + return false; + } + const SCEV *Start = SAR.getStart(); + const SCEV *Stride = SAR.getStepRecurrence(SE); + + // Skip pushing arithmetic noops. + if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { + if (!pushSCEV(Start)) + return false; + pushOperator(llvm::dwarf::DW_OP_minus); + } + if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { + if (!pushSCEV(Stride)) + return false; + pushOperator(llvm::dwarf::DW_OP_div); + } + return true; + } +}; + +struct DVIRecoveryRec { + DbgValueInst *DVI; + DIExpression *Expr; + Metadata *LocationOp; + const llvm::SCEV *SCEV; +}; + +static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI, + const SCEVDbgValueBuilder &IterationCount, + ScalarEvolution &SE) { + // LSR may add locations to previously single location-op DVIs which + // are currently not supported. + if (CachedDVI.DVI->getNumVariableLocationOps() != 1) + return false; + + // SCEVs for SSA values are most frquently of the form + // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). + // This is because %a is a PHI node that is not the IV. However, these + // SCEVs have not been observed to result in debuginfo-lossy optimisations, + // so its not expected this point will be reached. + if (!isa(CachedDVI.SCEV)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: " + << *CachedDVI.SCEV << '\n'); + + const auto *Rec = cast(CachedDVI.SCEV); + if (!Rec->isAffine()) + return false; + + // Initialise a new builder with the iteration count expression. In + // combination with the value's SCEV this enables recovery. + SCEVDbgValueBuilder RecoverValue(IterationCount); + if (!RecoverValue.SCEVToValueExpr(*Rec, SE)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n'); + RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr); + LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n'); + return true; +} + +static bool +DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, + llvm::PHINode *LSRInductionVar, + SmallVector &DVIToUpdate) { + if (DVIToUpdate.empty()) + return false; + + const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); + assert(SCEVInductionVar && + "Anticipated a SCEV for the post-LSR induction variable"); + + bool Changed = false; + if (const SCEVAddRecExpr *IVAddRec = + dyn_cast(SCEVInductionVar)) { + if (!IVAddRec->isAffine()) + return false; + + SCEVDbgValueBuilder IterCountExpr; + IterCountExpr.pushValue(LSRInductionVar); + if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) + return false; + + LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar + << '\n'); + + // Needn't salvage if the location op hasn't been undef'd by LSR. + for (auto &DVIRec : DVIToUpdate) { + if (!DVIRec.DVI->isUndef()) + continue; + + // Some DVIs that were single location-op when cached are now multi-op, + // due to LSR optimisations. However, multi-op salvaging is not yet + // supported by SCEV salvaging. But, we can attempt a salvage by restoring + // the pre-LSR single-op expression. + if (DVIRec.DVI->hasArgList()) { + if (!DVIRec.DVI->getVariableLocationOp(0)) + continue; + llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType(); + DVIRec.DVI->setRawLocation( + llvm::ValueAsMetadata::get(UndefValue::get(Ty))); + DVIRec.DVI->setExpression(DVIRec.Expr); + } + + Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE); + } + } + return Changed; +} + +/// Identify and cache salvageable DVI locations and expressions along with the +/// corresponding SCEV(s). Also ensure that the DVI is not deleted before +static void +DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, + SmallVector &SalvageableDVISCEVs, + SmallSet, 2> &DVIHandles) { for (auto &B : L->getBlocks()) { for (auto &I : *B) { auto DVI = dyn_cast(&I); if (!DVI) continue; - for (unsigned Idx = 0; Idx < DVI->getNumVariableLocationOps(); ++Idx) { - // TODO: We can duplicate results if the same arg appears more than - // once. - Value *V = DVI->getVariableLocationOp(Idx); - if (!V || !SE.isSCEVable(V->getType())) - continue; - auto DbgValueSCEV = SE.getSCEV(V); - EqualValues EqSet; - for (PHINode &Phi : L->getHeader()->phis()) { - if (V->getType() != Phi.getType()) - continue; - if (!SE.isSCEVable(Phi.getType())) - continue; - auto PhiSCEV = SE.getSCEV(&Phi); - Optional Offset = - SE.computeConstantDifference(DbgValueSCEV, PhiSCEV); - if (Offset && Offset->getMinSignedBits() <= 64) - EqSet.emplace_back( - std::make_tuple(&Phi, Offset.getValue().getSExtValue())); - } - DbgValueToEqualSet[DVI].push_back({Idx, std::move(EqSet)}); - // If we fall back to using this raw location, at least one location op - // must be dead. A DIArgList will automatically undef arguments when - // they become unavailable, but a ValueAsMetadata will not; since we - // know the value should be undef, we use the undef value directly here. - Metadata *RawLocation = - DVI->hasArgList() ? DVI->getRawLocation() - : ValueAsMetadata::get(UndefValue::get( - DVI->getVariableLocationOp(0)->getType())); - DbgValueToLocation[DVI] = {DVI->getExpression(), RawLocation}; - } + + if (DVI->hasArgList()) + continue; + + if (!DVI->getVariableLocationOp(0) || + !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType())) + continue; + + SalvageableDVISCEVs.push_back( + {DVI, DVI->getExpression(), DVI->getRawLocation(), + SE.getSCEV(DVI->getVariableLocationOp(0))}); + DVIHandles.insert(DVI); } } } -static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet, - LocationMap &DbgValueToLocation) { - for (auto A : DbgValueToEqualSet) { - auto *DVI = A.first; - // Only update those that are now undef. - if (!DVI->isUndef()) +/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback +/// any PHi from the loop header is usable, but may have less chance of +/// surviving subsequent transforms. +static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, + const LSRInstance &LSR) { + // For now, just pick the first IV generated and inserted. Ideally pick an IV + // that is unlikely to be optimised away by subsequent transforms. + for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { + if (!IV) continue; - // The dbg.value may have had its value or expression changed during LSR by - // a failed salvage attempt; refresh them from the map. - auto *DbgDIExpr = DbgValueToLocation[DVI].first; - DVI->setRawLocation(DbgValueToLocation[DVI].second); - DVI->setExpression(DbgDIExpr); - assert(DVI->isUndef() && "dbg.value with non-undef location should not " - "have been modified by LSR."); - for (auto IdxEV : A.second) { - unsigned Idx = IdxEV.first; - for (auto EV : IdxEV.second) { - auto EVHandle = std::get(EV); - if (!EVHandle) - continue; - int64_t Offset = std::get(EV); - DVI->replaceVariableLocationOp(Idx, EVHandle); - if (Offset) { - SmallVector Ops; - DIExpression::appendOffset(Ops, Offset); - DbgDIExpr = DIExpression::appendOpsToArg(DbgDIExpr, Ops, Idx, true); - } - DVI->setExpression(DbgDIExpr); - break; - } + + assert(isa(&*IV) && "Expected PhI node."); + if (SE.isSCEVable((*IV).getType())) { + PHINode *Phi = dyn_cast(&*IV); + LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV + << "with SCEV: " << *SE.getSCEV(Phi) << "\n"); + return Phi; } } + + for (PHINode &Phi : L.getHeader()->phis()) { + if (!SE.isSCEVable(Phi.getType())) + continue; + + const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi); + if (const llvm::SCEVAddRecExpr *Rec = dyn_cast(PhiSCEV)) + if (!Rec->isAffine()) + continue; + + LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi + << " with SCEV: " << *PhiSCEV << "\n"); + return Φ + } + return nullptr; } static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, @@ -5948,20 +6271,21 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA) { + // Debug preservation - before we start removing anything identify which DVI + // meet the salvageable criteria and store their DIExpression and SCEVs. + SmallVector SalvageableDVI; + SmallSet, 2> DVIHandles; + DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles); + bool Changed = false; std::unique_ptr MSSAU; if (MSSA) MSSAU = std::make_unique(MSSA); // Run the main LSR transformation. - Changed |= - LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged(); - - // Debug preservation - before we start removing anything create equivalence - // sets for the llvm.dbg.value intrinsics. - EqualValuesMap DbgValueToEqualSet; - LocationMap DbgValueToLocation; - DbgGatherEqualValues(L, SE, DbgValueToEqualSet, DbgValueToLocation); + const LSRInstance &Reducer = + LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); + Changed |= Reducer.getChanged(); // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); @@ -5981,8 +6305,22 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, } } - DbgApplyEqualValues(DbgValueToEqualSet, DbgValueToLocation); + if (SalvageableDVI.empty()) + return Changed; + // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with + // expressions composed using the derived iteration count. + // TODO: Allow for multiple IV references for nested AddRecSCEVs + for (auto &L : LI) { + if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) + DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI); + else { + LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " + "could not be identified.\n"); + } + } + + DVIHandles.clear(); return Changed; } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 5ec01454e5b2..fe160d5415bd 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2811,10 +2811,11 @@ private: if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) return false; + // Length must be in range for FixedVectorType. auto *C = cast(II.getLength()); - if (C->getBitWidth() > 64) + const uint64_t Len = C->getLimitedValue(); + if (Len > std::numeric_limits::max()) return false; - const auto Len = C->getZExtValue(); auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext()); auto *SrcTy = FixedVectorType::get(Int8Ty, Len); return canConvertValue(DL, SrcTy, AllocaTy) && diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 91280762aaa7..bd2b6fafdf2e 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -23,6 +24,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // to ensure we dominate all of our uses. Always insert right before the // relevant instruction (terminator, assume), so that we insert in proper // order in the case of multiple predicateinfo in the same block. + // The number of named values is used to detect if a new declaration was + // added. If so, that declaration is tracked so that it can be removed when + // the analysis is done. The corner case were a new declaration results in + // a name clash and the old name being renamed is not considered as that + // represents an invalid module. if (isa(ValInfo)) { IRBuilder<> B(getBranchTerminator(ValInfo)); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++)); PI.PredicateMap.insert({PIC, ValInfo}); @@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter, // Insert the predicate directly after the assume. While it also holds // directly before it, assume(i1 true) is not a useful fact. IRBuilder<> B(PAssume->AssumeInst->getNextNode()); + auto NumDecls = F.getParent()->getNumNamedValues(); Function *IF = Intrinsic::getDeclaration( F.getParent(), Intrinsic::ssa_copy, Op->getType()); + if (NumDecls != F.getParent()->getNumNamedValues()) + PI.CreatedDeclarations.insert(IF); CallInst *PIC = B.CreateCall(IF, Op); PI.PredicateMap.insert({PIC, ValInfo}); Result.Def = PIC; @@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT, Builder.buildPredicateInfo(); } +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { + // Collect function pointers in set first, as SmallSet uses a SmallVector + // internally and we have to remove the asserting value handles first. + SmallPtrSet FunctionPtrs; + for (auto &F : CreatedDeclarations) + FunctionPtrs.insert(&*F); + CreatedDeclarations.clear(); + + for (Function *F : FunctionPtrs) { + assert(F->user_begin() == F->user_end() && + "PredicateInfo consumer did not remove all SSA copies."); + F->eraseFromParent(); + } +} + Optional PredicateBase::getConstraint() const { switch (Type) { case PT_Assume: @@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { + for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { + const auto *PI = PredInfo.getPredicateInfoFor(&Inst); + auto *II = dyn_cast(&Inst); + if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) + continue; + + Inst.replaceAllUsesWith(II->getOperand(0)); + Inst.eraseFromParent(); + } +} + bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); @@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) { PredInfo->print(dbgs()); if (VerifyPredicateInfo) PredInfo->verifyPredicateInfo(); + + replaceCreatedSSACopys(*PredInfo, F); return false; } @@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F, auto PredInfo = std::make_unique(F, DT, AC); PredInfo->print(OS); + replaceCreatedSSACopys(*PredInfo, F); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 5af1c37e6197..3978e1e29825 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1393,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; - // Remember this PHI, even in post-inc mode. + // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most + // effective when we are able to use an IV inserted here, so record it. InsertedValues.insert(PN); - + InsertedIVs.push_back(PN); return PN; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f24ae6b100d5..671bc6b5212b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { // lane 0 demanded or b) are uses which demand only lane 0 of their operand. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sideeffect: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + if (TheLoop->hasLoopInvariantOperands(&I)) + addToWorklistIfAllowed(&I); + break; + default: + break; + } + } + // If there's no pointer operand, there's nothing to do. auto *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) @@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range); + // Even if the instruction is not marked as uniform, there are certain + // intrinsic calls that can be effectively treated as such, so we check for + // them here. Conservatively, we only do this for scalable vectors, since + // for fixed-width VFs we can always fall back on full scalarization. + if (!IsUniform && Range.Start.isScalable() && isa(I)) { + switch (cast(I)->getIntrinsicID()) { + case Intrinsic::assume: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // For scalable vectors if one of the operands is variant then we still + // want to mark as uniform, which will generate one instruction for just + // the first lane of the vector. We can't scalarize the call in the same + // way as for fixed-width vectors because we don't know how many lanes + // there are. + // + // The reasons for doing it this way for scalable vectors are: + // 1. For the assume intrinsic generating the instruction for the first + // lane is still be better than not generating any at all. For + // example, the input may be a splat across all lanes. + // 2. For the lifetime start/end intrinsics the pointer operand only + // does anything useful when the input comes from a stack object, + // which suggests it should always be uniform. For non-stack objects + // the effect is to poison the object, which still allows us to + // remove the call. + IsUniform = true; + break; + default: + break; + } + } + auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp index 9a949761bb75..4ecc3015529c 100644 --- a/llvm/tools/llvm-mca/Views/TimelineView.cpp +++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp @@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, double AverageTime1, AverageTime2, AverageTime3; AverageTime1 = - (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions; - AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions; - AverageTime3 = - (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions; + (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; + AverageTime2 = + (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; + AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / + CumulativeExecutions; OS << Executions; OS.PadToColumn(13); @@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); OS.PadToColumn(20); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, BufferSize); - OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); OS.PadToColumn(27); if (!PrintingTotals) tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, CumulativeExecutions, getSubTargetInfo().getSchedModel().MicroOpBufferSize); - OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10); + OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); if (OS.has_colors()) OS.resetColor(); diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 162fb38e1eed..dd3e7688d33f 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -344,6 +344,13 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, // link node as successor of all nodes in the prev_set if any npredecessors += __kmp_depnode_link_successor(gtid, thread, task, node, prev_set); + if (dep_barrier) { + // clean last_out and prev_set if any; don't touch last_set + __kmp_node_deref(thread, last_out); + info->last_out = NULL; + __kmp_depnode_list_free(thread, prev_set); + info->prev_set = NULL; + } } else { // last_set is of different dep kind, make it prev_set // link node as successor of all nodes in the last_set npredecessors += @@ -353,13 +360,21 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, info->last_out = NULL; // clean prev_set if any __kmp_depnode_list_free(thread, prev_set); - // move last_set to prev_set, new last_set will be allocated - info->prev_set = last_set; + if (!dep_barrier) { + // move last_set to prev_set, new last_set will be allocated + info->prev_set = last_set; + } else { + info->prev_set = NULL; + info->last_flag = 0; + } info->last_set = NULL; } - info->last_flag = dep->flag; // store dep kind of the last_set - info->last_set = __kmp_add_node(thread, info->last_set, node); - + // for dep_barrier last_flag value should remain: + // 0 if last_set is empty, unchanged otherwise + if (!dep_barrier) { + info->last_flag = dep->flag; // store dep kind of the last_set + info->last_set = __kmp_add_node(thread, info->last_set, node); + } // check if we are processing MTX dependency if (dep->flag == KMP_DEP_MTX) { if (info->mtx_lock == NULL) { @@ -756,8 +771,6 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depnode_t node = {0}; __kmp_init_node(&node); - // the stack owns the node - __kmp_node_ref(&node); if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash, DEP_BARRIER, ndeps, dep_list, ndeps_noalias, diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h index d1576dd5b791..73abf07018f3 100644 --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -23,8 +23,7 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { return; kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1; - // TODO: temporarily disable assertion until the bug with dependences is fixed - // KMP_DEBUG_ASSERT(n >= 0); + KMP_DEBUG_ASSERT(n >= 0); if (n == 0) { KMP_ASSERT(node->dn.nrefs == 0); #if USE_FAST_MEMORY diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 6c3e2c95cb5a..55e9c307638a 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1441,6 +1441,7 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, if (__kmp_enable_hidden_helper) { auto &input_flags = reinterpret_cast(flags); input_flags.hidden_helper = TRUE; + input_flags.tiedness = TASK_UNTIED; } return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,