From a096e0bdf6cfa020569afca490d8e4c9ac8ebb01 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:23:48 +0000 Subject: [PATCH 1/6] Vendor import of llvm release_60 branch r323338: https://llvm.org/svn/llvm-project/llvm/branches/release_60@323338 --- cmake/modules/LLVMConfig.cmake.in | 2 + docs/ReleaseNotes.rst | 56 ++++- include/llvm/Analysis/RegionInfoImpl.h | 12 +- .../CodeGen/SelectionDAGAddressAnalysis.h | 2 +- include/llvm/MC/MCCodeView.h | 48 +--- .../llvm/Support/GenericDomTreeConstruction.h | 66 ++++-- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 7 +- lib/CodeGen/CodeGenPrepare.cpp | 7 +- lib/CodeGen/GlobalMerge.cpp | 3 +- lib/CodeGen/PeepholeOptimizer.cpp | 41 ++-- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 116 +++++---- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 +- .../SelectionDAGAddressAnalysis.cpp | 21 +- lib/CodeGen/TargetLoweringBase.cpp | 15 +- lib/Linker/IRMover.cpp | 7 +- lib/MC/MCCodeView.cpp | 69 ++++++ .../AArch64/AArch64InstructionSelector.cpp | 34 +++ lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 20 -- lib/Target/PowerPC/PPCISelLowering.cpp | 43 ++++ lib/Target/PowerPC/PPCISelLowering.h | 6 + lib/Target/PowerPC/PPCInstrInfo.td | 12 + lib/Target/X86/AsmParser/X86AsmParser.cpp | 7 + lib/Target/X86/X86ISelLowering.cpp | 62 +++-- lib/Target/X86/X86TargetTransformInfo.cpp | 3 +- lib/Transforms/Scalar/GVNHoist.cpp | 2 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 108 +++------ lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +- lib/Transforms/Vectorize/SLPVectorizer.cpp | 61 +---- .../GlobalISel/select-gv-cmodel-large.mir | 61 +++++ test/CodeGen/AArch64/atomic-ops-lse.ll | 47 +++- test/CodeGen/AMDGPU/multilevel-break.ll | 5 +- test/CodeGen/AMDGPU/nested-loop-conditions.ll | 133 +++++++---- test/CodeGen/ARM/and-load-combine.ll | 14 +- test/CodeGen/ARM/atomic-cmpxchg.ll | 3 +- test/CodeGen/ARM/cmpxchg-O0.ll | 6 +- test/CodeGen/ARM/global-merge-dllexport.ll | 15 ++ test/CodeGen/ARM/global-merge-external.ll | 29 ++- test/CodeGen/ARM/peephole-phi.mir | 67 ++++++ test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll | 94 ++++++++ test/CodeGen/PowerPC/atomics-regression.ll | 40 ++++ .../X86/avx512-shuffles/partial_permute.ll | 39 ++++ test/CodeGen/X86/darwin-bzero.ll | 9 +- test/CodeGen/X86/inline-asm-A-constraint.ll | 3 +- test/CodeGen/X86/pr35761.ll | 36 +++ test/CodeGen/X86/pr35972.ll | 20 ++ test/CodeGen/X86/pr37563.ll | 42 ++++ test/CodeGen/X86/var-permute-128.ll | 5 +- test/CodeGen/X86/var-permute-256.ll | 180 ++++++++++++++ test/MC/COFF/cv-inline-linetable.s | 26 +++ test/MC/X86/x86-64.s | 38 ++- .../X86/Inputs/dicompositetype-unique2.ll | 46 ++++ test/ThinLTO/X86/dicompositetype-unique2.ll | 69 ++++++ .../X86/sink-addrmode-select.ll | 19 ++ .../Transforms/GVNHoist/pr35222-hoist-load.ll | 45 ++++ test/Transforms/JumpThreading/ddt-crash3.ll | 43 ++++ test/Transforms/JumpThreading/ddt-crash4.ll | 75 ++++++ test/Transforms/LoopVectorize/pr35773.ll | 53 +++++ .../Transforms/SLPVectorizer/X86/PR35628_1.ll | 74 ++++++ .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 64 +++++ test/Transforms/SLPVectorizer/X86/PR35777.ll | 48 ++++ test/Transforms/SLPVectorizer/X86/PR35865.ll | 27 +++ .../X86/insert-element-build-vector.ll | 220 +++++++++--------- .../SLPVectorizer/X86/insertvalue.ll | 162 +++++++++++-- .../Transforms/SLPVectorizer/X86/value-bug.ll | 48 +++- .../AMDGPU/backedge-id-bug-xfail.ll | 77 ++++++ .../StructurizeCFG/AMDGPU/backedge-id-bug.ll | 163 +++++++++++++ .../StructurizeCFG/AMDGPU/lit.local.cfg | 2 + .../StructurizeCFG/nested-loop-order.ll | 83 ++++--- .../tools/llvm-readobj/macho-needed-libs.test | 26 +++ tools/llvm-readobj/MachODumper.cpp | 30 +++ .../IR/DominatorTreeBatchUpdatesTest.cpp | 95 ++++++++ unittests/IR/DominatorTreeTest.cpp | 25 ++ utils/release/test-release.sh | 10 +- 74 files changed, 2673 insertions(+), 593 deletions(-) create mode 100644 test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir create mode 100644 test/CodeGen/ARM/global-merge-dllexport.ll create mode 100644 test/CodeGen/ARM/peephole-phi.mir create mode 100644 test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll create mode 100644 test/CodeGen/X86/pr35761.ll create mode 100644 test/CodeGen/X86/pr35972.ll create mode 100644 test/CodeGen/X86/pr37563.ll create mode 100644 test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll create mode 100644 test/ThinLTO/X86/dicompositetype-unique2.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll create mode 100644 test/Transforms/JumpThreading/ddt-crash3.ll create mode 100644 test/Transforms/JumpThreading/ddt-crash4.ll create mode 100644 test/Transforms/LoopVectorize/pr35773.ll create mode 100644 test/Transforms/SLPVectorizer/X86/PR35628_1.ll create mode 100644 test/Transforms/SLPVectorizer/X86/PR35628_2.ll create mode 100644 test/Transforms/SLPVectorizer/X86/PR35777.ll create mode 100644 test/Transforms/SLPVectorizer/X86/PR35865.ll create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll create mode 100644 test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg create mode 100644 test/tools/llvm-readobj/macho-needed-libs.test diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in index 077201691656..fe4df5278498 100644 --- a/cmake/modules/LLVMConfig.cmake.in +++ b/cmake/modules/LLVMConfig.cmake.in @@ -37,6 +37,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@) +set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@) + set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 41b9cf92d767..8ef9f6b86c51 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -54,6 +54,8 @@ Non-comprehensive list of changes in this release ``DIVariables`` to the instructions in a ``Module``. The ``CheckDebugify`` pass determines how much of the metadata is lost. +* Significantly improved quality of CodeView debug info for Windows. + * Note.. .. NOTE @@ -69,10 +71,13 @@ Non-comprehensive list of changes in this release Changes to the LLVM IR ---------------------- -Changes to the ARM Backend --------------------------- +Changes to the ARM Target +------------------------- - During this release ... +During this release the ARM target has: + +* Got support for enabling SjLj exception handling on platforms where it + isn't the default. Changes to the MIPS Target @@ -89,7 +94,10 @@ Changes to the PowerPC Target Changes to the X86 Target ------------------------- - During this release ... +During this release ... + +* Got support for enabling SjLj exception handling on platforms where it + isn't the default. Changes to the AMDGPU Target ----------------------------- @@ -116,8 +124,46 @@ Changes to the C API External Open Source Projects Using LLVM 6 ========================================== -* A project... +JFS - JIT Fuzzing Solver +------------------------ +`JFS `_ is an experimental constraint solver +designed to investigate using coverage guided fuzzing as an incomplete strategy +for solving boolean, BitVector, and floating-point constraints. +It is built on top of LLVM, Clang, LibFuzzer, and Z3. + +The solver works by generating a C++ program where the reachability of an +`abort()` statement is equivalent to finding a satisfying assignment to the +constraints. This program is then compiled by Clang with `SanitizerCoverage +`_ +instrumentation and then fuzzed using :doc:`LibFuzzer `. + +Zig Programming Language +------------------------ + +`Zig `_ is an open-source programming language designed +for robustness, optimality, and clarity. It is intended to replace C. It +provides high level features such as Generics, +Compile Time Function Execution, and Partial Evaluation, yet exposes low level +LLVM IR features such as Aliases. Zig uses Clang to provide automatic +import of .h symbols - even inline functions and macros. Zig uses LLD combined +with lazily building compiler-rt to provide out-of-the-box cross-compiling for +all supported targets. + +LDC - the LLVM-based D compiler +------------------------------- + +`D `_ is a language with C-like syntax and static typing. It +pragmatically combines efficiency, control, and modeling power, with safety and +programmer productivity. D supports powerful concepts like Compile-Time Function +Execution (CTFE) and Template Meta-Programming, provides an innovative approach +to concurrency and offers many classical paradigms. + +`LDC `_ uses the frontend from the reference compiler +combined with LLVM as backend to produce efficient native code. LDC targets +x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM +and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 +are underway. Additional Information ====================== diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index 6e522354dd9b..eb6baac2d5e4 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -254,23 +254,23 @@ std::string RegionBase::getNameStr() const { template void RegionBase::verifyBBInRegion(BlockT *BB) const { if (!contains(BB)) - llvm_unreachable("Broken region found: enumerated BB not in region!"); + report_fatal_error("Broken region found: enumerated BB not in region!"); BlockT *entry = getEntry(), *exit = getExit(); for (BlockT *Succ : make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { if (!contains(Succ) && exit != Succ) - llvm_unreachable("Broken region found: edges leaving the region must go " - "to the exit node!"); + report_fatal_error("Broken region found: edges leaving the region must go " + "to the exit node!"); } if (entry != BB) { for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB), InvBlockTraits::child_end(BB))) { if (!contains(Pred)) - llvm_unreachable("Broken region found: edges entering the region must " - "go to the entry node!"); + report_fatal_error("Broken region found: edges entering the region must " + "go to the entry node!"); } } } @@ -557,7 +557,7 @@ void RegionInfoBase::verifyBBMap(const RegionT *R) const { } else { BlockT *BB = Element->template getNodeAs(); if (getRegionFor(BB) != R) - llvm_unreachable("BB map does not match region nesting"); + report_fatal_error("BB map does not match region nesting"); } } } diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 18e4c7a83def..580606441a9d 100644 --- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -56,7 +56,7 @@ class BaseIndexOffset { int64_t &Off); /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG); + static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG); }; } // end namespace llvm diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h index e2249f49c86c..c8f14515ed34 100644 --- a/include/llvm/MC/MCCodeView.h +++ b/include/llvm/MC/MCCodeView.h @@ -177,13 +177,7 @@ class CodeViewContext { unsigned IACol); /// Retreive the function info if this is a valid function id, or nullptr. - MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) { - if (FuncId >= Functions.size()) - return nullptr; - if (Functions[FuncId].isUnallocatedFunctionInfo()) - return nullptr; - return &Functions[FuncId]; - } + MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId); /// Saves the information from the currently parsed .cv_loc directive /// and sets CVLocSeen. When the next instruction is assembled an entry @@ -199,50 +193,22 @@ class CodeViewContext { CurrentCVLoc.setIsStmt(IsStmt); CVLocSeen = true; } - void clearCVLocSeen() { CVLocSeen = false; } bool getCVLocSeen() { return CVLocSeen; } + void clearCVLocSeen() { CVLocSeen = false; } + const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; } bool isValidCVFileNumber(unsigned FileNumber); /// \brief Add a line entry. - void addLineEntry(const MCCVLineEntry &LineEntry) { - size_t Offset = MCCVLines.size(); - auto I = MCCVLineStartStop.insert( - {LineEntry.getFunctionId(), {Offset, Offset + 1}}); - if (!I.second) - I.first->second.second = Offset + 1; - MCCVLines.push_back(LineEntry); - } + void addLineEntry(const MCCVLineEntry &LineEntry); - std::vector getFunctionLineEntries(unsigned FuncId) { - std::vector FilteredLines; + std::vector getFunctionLineEntries(unsigned FuncId); - auto I = MCCVLineStartStop.find(FuncId); - if (I != MCCVLineStartStop.end()) - for (size_t Idx = I->second.first, End = I->second.second; Idx != End; - ++Idx) - if (MCCVLines[Idx].getFunctionId() == FuncId) - FilteredLines.push_back(MCCVLines[Idx]); - return FilteredLines; - } + std::pair getLineExtent(unsigned FuncId); - std::pair getLineExtent(unsigned FuncId) { - auto I = MCCVLineStartStop.find(FuncId); - // Return an empty extent if there are no cv_locs for this function id. - if (I == MCCVLineStartStop.end()) - return {~0ULL, 0}; - return I->second; - } - - ArrayRef getLinesForExtent(size_t L, size_t R) { - if (R <= L) - return None; - if (L >= MCCVLines.size()) - return None; - return makeArrayRef(&MCCVLines[L], R - L); - } + ArrayRef getLinesForExtent(size_t L, size_t R); /// Emits a line table substream. void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId, diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index 8f801662d0fb..25175fe66aa8 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -628,7 +628,7 @@ struct SemiNCAInfo { DecreasingLevel> Bucket; // Queue of tree nodes sorted by level in descending order. SmallDenseSet Affected; - SmallDenseSet Visited; + SmallDenseMap Visited; SmallVector AffectedQueue; SmallVector VisitedNotAffectedQueue; }; @@ -706,7 +706,7 @@ struct SemiNCAInfo { // algorithm does not really know or use the set of roots and can make a // different (implicit) decision about which nodes within an infinite loop // becomes a root. - if (DT.isVirtualRoot(TN->getIDom())) { + if (TN && !DT.isVirtualRoot(TN->getIDom())) { DEBUG(dbgs() << "Root " << BlockNamePrinter(R) << " is not virtual root's child\n" << "The entire tree needs to be rebuilt\n"); @@ -753,14 +753,16 @@ struct SemiNCAInfo { while (!II.Bucket.empty()) { const TreeNodePtr CurrentNode = II.Bucket.top().second; + const unsigned CurrentLevel = CurrentNode->getLevel(); II.Bucket.pop(); DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: " << BlockNamePrinter(CurrentNode) << "\n"); - II.Visited.insert(CurrentNode); + + II.Visited.insert({CurrentNode, CurrentLevel}); II.AffectedQueue.push_back(CurrentNode); // Discover and collect affected successors of the current node. - VisitInsertion(DT, BUI, CurrentNode, CurrentNode->getLevel(), NCD, II); + VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II); } // Finish by updating immediate dominators and levels. @@ -772,13 +774,17 @@ struct SemiNCAInfo { const TreeNodePtr TN, const unsigned RootLevel, const TreeNodePtr NCD, InsertionInfo &II) { const unsigned NCDLevel = NCD->getLevel(); - DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n"); + DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel " + << RootLevel << "\n"); SmallVector Stack = {TN}; assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!"); + SmallPtrSet Processed; + do { TreeNodePtr Next = Stack.pop_back_val(); + DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n"); for (const NodePtr Succ : ChildrenGetter::Get(Next->getBlock(), BUI)) { @@ -786,19 +792,31 @@ struct SemiNCAInfo { assert(SuccTN && "Unreachable successor found at reachable insertion"); const unsigned SuccLevel = SuccTN->getLevel(); - DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) - << ", level = " << SuccLevel << "\n"); + DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = " + << SuccLevel << "\n"); + + // Do not process the same node multiple times. + if (Processed.count(Next) > 0) + continue; // Succ dominated by subtree From -- not affected. // (Based on the lemma 2.5 from the second paper.) if (SuccLevel > RootLevel) { DEBUG(dbgs() << "\t\tDominated by subtree From\n"); - if (II.Visited.count(SuccTN) != 0) - continue; + if (II.Visited.count(SuccTN) != 0) { + DEBUG(dbgs() << "\t\t\talready visited at level " + << II.Visited[SuccTN] << "\n\t\t\tcurrent level " + << RootLevel << ")\n"); + + // A node can be necessary to visit again if we see it again at + // a lower level than before. + if (II.Visited[SuccTN] >= RootLevel) + continue; + } DEBUG(dbgs() << "\t\tMarking visited not affected " << BlockNamePrinter(Succ) << "\n"); - II.Visited.insert(SuccTN); + II.Visited.insert({SuccTN, RootLevel}); II.VisitedNotAffectedQueue.push_back(SuccTN); Stack.push_back(SuccTN); } else if ((SuccLevel > NCDLevel + 1) && @@ -809,6 +827,8 @@ struct SemiNCAInfo { II.Bucket.push({SuccLevel, SuccTN}); } } + + Processed.insert(Next); } while (!Stack.empty()); } @@ -920,21 +940,21 @@ struct SemiNCAInfo { const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To); const TreeNodePtr NCD = DT.getNode(NCDBlock); - // To dominates From -- nothing to do. - if (ToTN == NCD) return; + // If To dominates From -- nothing to do. + if (ToTN != NCD) { + DT.DFSInfoValid = false; - DT.DFSInfoValid = false; + const TreeNodePtr ToIDom = ToTN->getIDom(); + DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " + << BlockNamePrinter(ToIDom) << "\n"); - const TreeNodePtr ToIDom = ToTN->getIDom(); - DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " - << BlockNamePrinter(ToIDom) << "\n"); - - // To remains reachable after deletion. - // (Based on the caption under Figure 4. from the second paper.) - if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN)) - DeleteReachable(DT, BUI, FromTN, ToTN); - else - DeleteUnreachable(DT, BUI, ToTN); + // To remains reachable after deletion. + // (Based on the caption under Figure 4. from the second paper.) + if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN)) + DeleteReachable(DT, BUI, FromTN, ToTN); + else + DeleteUnreachable(DT, BUI, ToTN); + } if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI); } diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 25f264c4722c..781a628a0974 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -95,14 +95,9 @@ struct SLPVectorizerPass : public PassInfoMixin { bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and cost estimation when NeedExtraction - /// is false. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, - ArrayRef BuildVector = None, - bool AllowReorder = false, - bool NeedExtraction = false); + bool AllowReorder = false); /// \brief Try to vectorize a chain that may start at the operands of \p I. bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 9dc1ab4e6bb5..26ca8d4ee88c 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -2700,8 +2700,13 @@ class AddressingModeCombiner { // we still need to collect it due to original value is different. // And later we will need all original values as anchors during // finding the common Phi node. + // We also must reject the case when base offset is different and + // scale reg is not null, we cannot handle this case due to merge of + // different offsets will be used as ScaleReg. if (DifferentField != ExtAddrMode::MultipleFields && - DifferentField != ExtAddrMode::ScaleField) { + DifferentField != ExtAddrMode::ScaleField && + (DifferentField != ExtAddrMode::BaseOffsField || + !NewAddrMode.ScaledReg)) { AddrModes.emplace_back(NewAddrMode); return true; } diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 8b9545da914e..3888226fa059 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -577,7 +577,8 @@ bool GlobalMerge::doInitialization(Module &M) { for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only if (GV.isDeclaration() || GV.isThreadLocal() || - GV.hasSection() || GV.hasImplicitSection()) + GV.hasSection() || GV.hasImplicitSection() || + GV.hasDLLExportStorageClass()) continue; // It's not safe to merge globals that may be preempted diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 45078081987a..11acbe687a31 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -719,15 +719,14 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, CurSrcPair = Pair; ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, !DisableAdvCopyOpt, TII); - ValueTrackerResult Res; - bool ShouldRewrite = false; - do { - // Follow the chain of copies until we reach the top of the use-def chain - // or find a more suitable source. - Res = ValTracker.getNextSource(); + // Follow the chain of copies until we find a more suitable source, a phi + // or have to abort. + while (true) { + ValueTrackerResult Res = ValTracker.getNextSource(); + // Abort at the end of a chain (without finding a suitable source). if (!Res.isValid()) - break; + return false; // Insert the Def -> Use entry for the recently found source. ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); @@ -763,24 +762,19 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) return false; + // Keep following the chain if the value isn't any better yet. const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); - ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, - CurSrcPair.SubReg); - } while (!ShouldRewrite); + if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg)) + continue; - // Continue looking for new sources... - if (Res.isValid()) - continue; + // We currently cannot deal with subreg operands on PHI instructions + // (see insertPHI()). + if (PHICount > 0 && CurSrcPair.SubReg != 0) + continue; - // Do not continue searching for a new source if the there's at least - // one use-def which cannot be rewritten. - if (!ShouldRewrite) - return false; - } - - if (PHICount >= RewritePHILimit) { - DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); - return false; + // We found a suitable source, and are done with this chain. + break; + } } // If we did not find a more suitable source, there is nothing to optimize. @@ -799,6 +793,9 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); + // NewRC is only correct if no subregisters are involved. findNextSource() + // should have rejected those cases already. + assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); unsigned NewVR = MRI->createVirtualRegister(NewRC); MachineBasicBlock *MBB = OrigPHI->getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 81bff4d7eefa..2c6b724c02df 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3842,9 +3842,16 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, EVT ExtVT; if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { - // Only add this load if we can make it more narrow. - if (ExtVT.bitsLT(Load->getMemoryVT())) + + // ZEXTLOAD is already small enough. + if (Load->getExtensionType() == ISD::ZEXTLOAD && + ExtVT.bitsGE(Load->getMemoryVT())) + continue; + + // Use LE to convert equal sized loads to zext. + if (ExtVT.bitsLE(Load->getMemoryVT())) Loads.insert(Load); + continue; } return false; @@ -3899,11 +3906,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { if (Loads.size() == 0) return false; + DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); SDValue MaskOp = N->getOperand(1); // If it exists, fixup the single node we allow in the tree that needs // masking. if (FixupNode) { + DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), SDValue(FixupNode, 0), MaskOp); @@ -3914,14 +3923,21 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { // Narrow any constants that need it. for (auto *LogicN : NodesWithConsts) { - auto *C = cast(LogicN->getOperand(1)); - SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0), - SDValue(C, 0), MaskOp); - DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And); + SDValue Op0 = LogicN->getOperand(0); + SDValue Op1 = LogicN->getOperand(1); + + if (isa(Op0)) + std::swap(Op0, Op1); + + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); + + DAG.UpdateNodeOperands(LogicN, Op0, And); } // Create narrow loads. for (auto *Load : Loads) { + DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); @@ -5209,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; @@ -12928,7 +12944,7 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); EVT MemVT = St->getMemoryVT(); SDValue Val = peekThroughBitcast(St->getValue()); @@ -12949,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates( EVT LoadVT; if (IsLoadSrc) { auto *Ld = cast(Val); - LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); // Load and store should be the same type. if (MemVT != LoadVT) @@ -12968,7 +12984,7 @@ void DAGCombiner::getStoreMergeCandidates( return false; // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast(Val)) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + auto LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) @@ -12992,7 +13008,7 @@ void DAGCombiner::getStoreMergeCandidates( Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; } - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); + Ptr = BaseIndexOffset::match(Other, DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -13365,7 +13381,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); // If this is not the first ptr that we check. int64_t LdOffset = 0; if (LdBasePtr.getBase().getNode()) { @@ -17432,44 +17448,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); int64_t PtrDiff; - if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) - return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); - // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be - // able to calculate their relative offset if at least one arises - // from an alloca. However, these allocas cannot overlap and we - // can infer there is no alias. - if (auto *A = dyn_cast(BasePtr0.getBase())) - if (auto *B = dyn_cast(BasePtr1.getBase())) { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // If the base are the same frame index but the we couldn't find a - // constant offset, (indices are different) be conservative. - if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || - !MFI.isFixedObjectIndex(B->getIndex()))) - return false; - } + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast(BasePtr0.getBase())) + if (auto *B = dyn_cast(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) + return false; + } - bool IsFI0 = isa(BasePtr0.getBase()); - bool IsFI1 = isa(BasePtr1.getBase()); - bool IsGV0 = isa(BasePtr0.getBase()); - bool IsGV1 = isa(BasePtr1.getBase()); - bool IsCV0 = isa(BasePtr0.getBase()); - bool IsCV1 = isa(BasePtr1.getBase()); + bool IsFI0 = isa(BasePtr0.getBase()); + bool IsFI1 = isa(BasePtr1.getBase()); + bool IsGV0 = isa(BasePtr0.getBase()); + bool IsGV1 = isa(BasePtr1.getBase()); + bool IsCV0 = isa(BasePtr0.getBase()); + bool IsCV1 = isa(BasePtr1.getBase()); - // If of mismatched base types or checkable indices we can check - // they do not alias. - if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || - (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && - (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) - return false; + // If of mismatched base types or checkable indices we can check + // they do not alias. + if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || + (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && + (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) + return false; + } - // If we know required SrcValue1 and SrcValue2 have relatively large alignment - // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by - // splitting vector types. + // If we know required SrcValue1 and SrcValue2 have relatively large + // alignment compared to the size and offset of the access, we may be able + // to prove they do not alias. This check is conservative for now to catch + // cases created by splitting vector types. int64_t SrcValOffset0 = Op0->getSrcValueOffset(); int64_t SrcValOffset1 = Op1->getSrcValueOffset(); unsigned OrigAlignment0 = Op0->getOriginalAlignment(); @@ -17479,8 +17497,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; - // There is no overlap between these relatively aligned accesses of similar - // size. Return no alias. + // There is no overlap between these relatively aligned accesses of + // similar size. Return no alias. if ((OffAlign0 + NumBytes0) <= OffAlign1 || (OffAlign1 + NumBytes1) <= OffAlign0) return false; @@ -17643,7 +17661,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -17669,7 +17687,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); // Check that the base pointer is the same as the original one. if (!BasePtr.equalBaseIndex(Ptr, DAG)) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bb1dc17b7a1b..b566c232cbc3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ZERO_EXTEND: LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, DAG.getValueType(AtomicType)); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); ExtRes = LHS; break; case ISD::ANY_EXTEND: LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); break; default: llvm_unreachable("Invalid atomic op extension"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4c8b63d2f239..3ffc6fa9a059 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7947,11 +7947,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - - auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); - auto LocDecomp = BaseIndexOffset::match(Loc, *this); + auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); + auto LocDecomp = BaseIndexOffset::match(LD, *this); int64_t Offset = 0; if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index d5980919d03c..da1574f60524 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -21,6 +21,9 @@ using namespace llvm; bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { + // Conservatively fail if we a match failed.. + if (!Base.getNode() || !Other.Base.getNode()) + return false; // Initial Offset difference. Off = Other.Offset - Offset; @@ -72,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, } /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { +BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, + const SelectionDAG &DAG) { + SDValue Ptr = N->getBasePtr(); + // (((B + I*M) + c)) + c ... SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); SDValue Index = SDValue(); int64_t Offset = 0; bool IsIndexSignExt = false; + // pre-inc/pre-dec ops are components of EA. + if (N->getAddressingMode() == ISD::PRE_INC) { + if (auto *C = dyn_cast(N->getOffset())) + Offset += C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } else if (N->getAddressingMode() == ISD::PRE_DEC) { + if (auto *C = dyn_cast(N->getOffset())) + Offset -= C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } + // Consume constant adds & ors with appropriate masking. while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { if (auto *C = dyn_cast(Base->getOperand(1))) { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 224ae1a3236a..b29a33ac1c14 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -132,9 +132,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); - // Darwin 10 and higher has an optimized __bzero. - if (!TT.isMacOSX() || !TT.isMacOSXVersionLT(10, 6) || TT.isArch64Bit()) { - setLibcallName(RTLIB::BZERO, TT.isAArch64() ? "bzero" : "__bzero"); + // Some darwins have an optimized __bzero/bzero function. + switch (TT.getArch()) { + case Triple::x86: + case Triple::x86_64: + if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) + setLibcallName(RTLIB::BZERO, "__bzero"); + break; + case Triple::aarch64: + setLibcallName(RTLIB::BZERO, "bzero"); + break; + default: + break; } if (darwinHasSinCos(TT)) { diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index ee067a912e3c..f7170e714b9b 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -954,7 +954,12 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, NewGV->setLinkage(GlobalValue::InternalLinkage); Constant *C = NewGV; - if (DGV) + // Only create a bitcast if necessary. In particular, with + // DebugTypeODRUniquing we may reach metadata in the destination module + // containing a GV from the source module, in which case SGV will be + // the same as DGV and NewGV, and TypeMap.get() will assert since it + // assumes it is being invoked on a type in the source module. + if (DGV && NewGV != SGV) C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType())); if (DGV && NewGV != DGV) { diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp index 82b81ccc24da..5fd5bde9f1eb 100644 --- a/lib/MC/MCCodeView.cpp +++ b/lib/MC/MCCodeView.cpp @@ -76,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber, return true; } +MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) { + if (FuncId >= Functions.size()) + return nullptr; + if (Functions[FuncId].isUnallocatedFunctionInfo()) + return nullptr; + return &Functions[FuncId]; +} + bool CodeViewContext::recordFunctionId(unsigned FuncId) { if (FuncId >= Functions.size()) Functions.resize(FuncId + 1); @@ -247,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS, OS.EmitValueImpl(SRE, 4); } +void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) { + size_t Offset = MCCVLines.size(); + auto I = MCCVLineStartStop.insert( + {LineEntry.getFunctionId(), {Offset, Offset + 1}}); + if (!I.second) + I.first->second.second = Offset + 1; + MCCVLines.push_back(LineEntry); +} + +std::vector +CodeViewContext::getFunctionLineEntries(unsigned FuncId) { + std::vector FilteredLines; + auto I = MCCVLineStartStop.find(FuncId); + if (I != MCCVLineStartStop.end()) { + MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId); + for (size_t Idx = I->second.first, End = I->second.second; Idx != End; + ++Idx) { + unsigned LocationFuncId = MCCVLines[Idx].getFunctionId(); + if (LocationFuncId == FuncId) { + // This was a .cv_loc directly for FuncId, so record it. + FilteredLines.push_back(MCCVLines[Idx]); + } else { + // Check if the current location is inlined in this function. If it is, + // synthesize a statement .cv_loc at the original inlined call site. + auto I = SiteInfo->InlinedAtMap.find(LocationFuncId); + if (I != SiteInfo->InlinedAtMap.end()) { + MCCVFunctionInfo::LineInfo &IA = I->second; + // Only add the location if it differs from the previous location. + // Large inlined calls will have many .cv_loc entries and we only need + // one line table entry in the parent function. + if (FilteredLines.empty() || + FilteredLines.back().getFileNum() != IA.File || + FilteredLines.back().getLine() != IA.Line || + FilteredLines.back().getColumn() != IA.Col) { + FilteredLines.push_back(MCCVLineEntry( + MCCVLines[Idx].getLabel(), + MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false))); + } + } + } + } + } + return FilteredLines; +} + +std::pair CodeViewContext::getLineExtent(unsigned FuncId) { + auto I = MCCVLineStartStop.find(FuncId); + // Return an empty extent if there are no cv_locs for this function id. + if (I == MCCVLineStartStop.end()) + return {~0ULL, 0}; + return I->second; +} + +ArrayRef CodeViewContext::getLinesForExtent(size_t L, size_t R) { + if (R <= L) + return None; + if (L >= MCCVLines.size()) + return None; + return makeArrayRef(&MCCVLines[L], R - L); +} + void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId, const MCSymbol *FuncBegin, diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index c2d3ae31c624..b85b4e082996 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -868,6 +868,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I, if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); + } else if (TM.getCodeModel() == CodeModel::Large) { + // Materialize the global using movz/movk instructions. + unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto InsertPt = std::next(I.getIterator()); + auto MovZ = + BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi)) + .addDef(MovZDstReg); + MovZ->addOperand(MF, I.getOperand(1)); + MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | + AArch64II::MO_NC); + MovZ->addOperand(MF, MachineOperand::CreateImm(0)); + constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); + + auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, + unsigned Offset, unsigned ForceDstReg) { + unsigned DstReg = + ForceDstReg ? ForceDstReg + : MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(), + TII.get(AArch64::MOVKXi)) + .addDef(DstReg) + .addReg(SrcReg); + MovI->addOperand(MF, MachineOperand::CreateGA( + GV, MovZ->getOperand(1).getOffset(), Flags)); + MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); + constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); + return DstReg; + }; + unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), + AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); + BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + I.eraseFromParent(); + return true; } else { I.setDesc(TII.get(AArch64::MOVaddr)); I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 740861851185..f08c50540656 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -821,7 +821,6 @@ namespace llvm { MutableArrayRef NewMask, unsigned Options = None); OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, MutableArrayRef NewMask); - OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results); OpRef vmuxs(ArrayRef Bytes, OpRef Va, OpRef Vb, ResultStack &Results); OpRef vmuxp(ArrayRef Bytes, OpRef Va, OpRef Vb, @@ -1139,25 +1138,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, return concat(Out[0], Out[1], Results); } -OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) { - DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); - - int VecLen = SM.Mask.size(); - SmallVector UsedBytes(VecLen); - bool HasUnused = false; - for (int I = 0; I != VecLen; ++I) { - if (SM.Mask[I] != -1) - UsedBytes[I] = 0xFF; - else - HasUnused = true; - } - if (!HasUnused) - return Va; - SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode)); - Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)}); - return OpRef::res(Results.top()); -} - OpRef HvxSelector::vmuxs(ArrayRef Bytes, OpRef Va, OpRef Vb, ResultStack &Results) { DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f9de65fcb1df..f0e8b11a3d9c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; + case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; + case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; @@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { return Op; } +// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be +// compared to a value that is atomically loaded (atomic loads zero-extend). +SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && + "Expecting an atomic compare-and-swap here."); + SDLoc dl(Op); + auto *AtomicNode = cast(Op.getNode()); + EVT MemVT = AtomicNode->getMemoryVT(); + if (MemVT.getSizeInBits() >= 32) + return Op; + + SDValue CmpOp = Op.getOperand(2); + // If this is already correctly zero-extended, leave it alone. + auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); + if (DAG.MaskedValueIsZero(CmpOp, HighBits)) + return Op; + + // Clear the high bits of the compare operand. + unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; + SDValue NewCmpOp = + DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, + DAG.getConstant(MaskVal, dl, MVT::i32)); + + // Replace the existing compare operand with the properly zero-extended one. + SmallVector Ops; + for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) + Ops.push_back(AtomicNode->getOperand(i)); + Ops[2] = NewCmpOp; + MachineMemOperand *MMO = AtomicNode->getMemOperand(); + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); + auto NodeTy = + (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; + return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerREM(Op, DAG); case ISD::BSWAP: return LowerBSWAP(Op, DAG); + case ISD::ATOMIC_CMP_SWAP: + return LowerATOMIC_CMP_SWAP(Op, DAG); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b119e5b4a564..b3215a84829e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -430,6 +430,11 @@ namespace llvm { /// The 4xf32 load used for v4i1 constants. QVLFSb, + /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes + /// except they ensure that the compare input is zero-extended for + /// sub-word versions because the atomic loads zero-extend. + ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16, + /// GPRC = TOC_ENTRY GA, TOC /// Loads the entry for GA from the TOC, where the TOC base is given by /// the last operand. @@ -955,6 +960,7 @@ namespace llvm { SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a932d05b24ee..43dcc4479cf0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; +// PPC-specific atomic operations. +def PPCatomicCmpSwap_8 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def PPCatomicCmpSwap_16 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, @@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in { } } +def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>; +def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>; + // Instructions to support atomic operations let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f1ce430f3323..f2ffba7d5418 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2375,6 +2375,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) Flags |= Prefix; + if (getLexer().is(AsmToken::EndOfStatement)) { + // We don't have real instr with the given prefix + // let's use the prefix as the instr. + // TODO: there could be several prefixes one after another + Flags = X86::IP_NO_PREFIX; + break; + } Name = Parser.getTok().getString(); Parser.Lex(); // eat the prefix // Hack: we could have something like "rep # some comment" or diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6f56877bd64..e7d9334abe14 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7893,8 +7893,14 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); - return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV, - SDLoc(V), VT, IndicesVec, SrcVec); + if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) { + SrcVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT), + SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec))); + } + if (VT == MVT::v16i8) + return DAG.getNode(X86ISD::PSHUFB, SDLoc(V), VT, SrcVec, IndicesVec); + return DAG.getNode(X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec); } SDValue @@ -18262,6 +18268,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } + // For v64i1 without 64-bit support we need to split and rejoin. + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + assert(Subtarget.hasBWI() && "Expected BWI to be legal"); + SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32); + SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32); + SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32); + SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32); + SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo); + SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) @@ -28652,13 +28670,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operand in case early exit happens. if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT) && + NewV1, DL, DAG, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(ShuffleSrcVT, V1); + Res = DAG.getBitcast(ShuffleSrcVT, NewV1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); @@ -28680,33 +28699,36 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operands in case early exit happens. + SDValue NewV2 = V2; if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, V2, DL, DAG, Subtarget, Shuffle, + NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleSrcVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleSrcVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); + NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, V1, V2, DL, DAG, - Subtarget, Shuffle, ShuffleVT, - PermuteImm) && + NewV1 = V1; // Save operands in case early exit happens. + NewV2 = V2; + if (matchBinaryPermuteVectorShuffle( + MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, + NewV1 = DAG.getBitcast(ShuffleVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 223eed3048db..967d67a84bc0 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -754,7 +754,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // type remains the same. if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { MVT LegalVT = LT.second; - if (LegalVT.getVectorElementType().getSizeInBits() == + if (LegalVT.isVector() && + LegalVT.getVectorElementType().getSizeInBits() == Tp->getVectorElementType()->getPrimitiveSizeInBits() && LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) { diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index c0cd1ea74a74..026fab5dbd3b 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -648,7 +648,7 @@ class GVNHoist { // track in a CHI. In the PDom walk, there can be values in the // stack which are not control dependent e.g., nested loop. if (si != RenameStack.end() && si->second.size() && - DT->dominates(Pred, si->second.back()->getParent())) { + DT->properlyDominates(Pred, si->second.back()->getParent())) { C.Dest = BB; // Assign the edge C.I = si->second.pop_back_val(); // Assign the argument DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index b8fb80b6cc26..525425bd0f0c 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" @@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass { Region *ParentRegion; DominatorTree *DT; - LoopInfo *LI; - SmallVector Order; + std::deque Order; BBSet Visited; BBPhiMap DeletedPhis; @@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass { void gatherPredicates(RegionNode *N); - void collectInfos(); + void analyzeNode(RegionNode *N); void insertConditions(bool Loops); @@ -258,7 +256,6 @@ class StructurizeCFG : public RegionPass { AU.addRequired(); AU.addRequiredID(LowerSwitchID); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); RegionPass::getAnalysisUsage(AU); @@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { /// \brief Build up the general order of nodes void StructurizeCFG::orderNodes() { - ReversePostOrderTraversal RPOT(ParentRegion); - SmallDenseMap LoopBlocks; + assert(Visited.empty()); + assert(Predicates.empty()); + assert(Loops.empty()); + assert(LoopPreds.empty()); - // The reverse post-order traversal of the list gives us an ordering close - // to what we want. The only problem with it is that sometimes backedges - // for outer loops will be visited before backedges for inner loops. - for (RegionNode *RN : RPOT) { - BasicBlock *BB = RN->getEntry(); - Loop *Loop = LI->getLoopFor(BB); - ++LoopBlocks[Loop]; + // This must be RPO order for the back edge detection to work + for (RegionNode *RN : ReversePostOrderTraversal(ParentRegion)) { + // FIXME: Is there a better order to use for structurization? + Order.push_back(RN); + analyzeNode(RN); } - - unsigned CurrentLoopDepth = 0; - Loop *CurrentLoop = nullptr; - for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = (*I)->getEntry(); - unsigned LoopDepth = LI->getLoopDepth(BB); - - if (is_contained(Order, *I)) - continue; - - if (LoopDepth < CurrentLoopDepth) { - // Make sure we have visited all blocks in this loop before moving back to - // the outer loop. - - auto LoopI = I; - while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { - LoopI++; - BasicBlock *LoopBB = (*LoopI)->getEntry(); - if (LI->getLoopFor(LoopBB) == CurrentLoop) { - --BlockCount; - Order.push_back(*LoopI); - } - } - } - - CurrentLoop = LI->getLoopFor(BB); - if (CurrentLoop) - LoopBlocks[CurrentLoop]--; - - CurrentLoopDepth = LoopDepth; - Order.push_back(*I); - } - - // This pass originally used a post-order traversal and then operated on - // the list in reverse. Now that we are using a reverse post-order traversal - // rather than re-working the whole pass to operate on the list in order, - // we just reverse the list and continue to operate on it in reverse. - std::reverse(Order.begin(), Order.end()); } /// \brief Determine the end of the loops @@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { } /// \brief Collect various loop and predicate infos -void StructurizeCFG::collectInfos() { - // Reset predicate - Predicates.clear(); +void StructurizeCFG::analyzeNode(RegionNode *RN) { + DEBUG(dbgs() << "Visiting: " + << (RN->isSubRegion() ? "SubRegion with entry: " : "") + << RN->getEntry()->getName() << '\n'); - // and loop infos - Loops.clear(); - LoopPreds.clear(); + // Analyze all the conditions leading to a node + gatherPredicates(RN); - // Reset the visited nodes - Visited.clear(); + // Remember that we've seen this node + Visited.insert(RN->getEntry()); - for (RegionNode *RN : reverse(Order)) { - DEBUG(dbgs() << "Visiting: " - << (RN->isSubRegion() ? "SubRegion with entry: " : "") - << RN->getEntry()->getName() << " Loop Depth: " - << LI->getLoopDepth(RN->getEntry()) << "\n"); - - // Analyze all the conditions leading to a node - gatherPredicates(RN); - - // Remember that we've seen this node - Visited.insert(RN->getEntry()); - - // Find the last back edges - analyzeLoops(RN); - } + // Find the last back edges + analyzeLoops(RN); } /// \brief Insert the missing branch conditions @@ -664,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { LLVMContext &Context = Func->getContext(); BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : - Order.back()->getEntry(); + Order.front()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); DT->addNewBlock(Flow, Dominator); @@ -744,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { /// Take one node from the order vector and wire it up void StructurizeCFG::wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.pop_back_val(); + RegionNode *Node = Order.front(); + Order.pop_front(); Visited.insert(Node->getEntry()); if (isPredictableTrue(Node)) { @@ -768,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, PrevNode = Node; while (!Order.empty() && !Visited.count(LoopEnd) && - dominatesPredicates(Entry, Order.back())) { + dominatesPredicates(Entry, Order.front())) { handleLoops(false, LoopEnd); } @@ -779,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, void StructurizeCFG::handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.back(); + RegionNode *Node = Order.front(); BasicBlock *LoopStart = Node->getEntry(); if (!Loops.count(LoopStart)) { @@ -924,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { ParentRegion = R; DT = &getAnalysis().getDomTree(); - LI = &getAnalysis().getLoopInfo(); orderNodes(); - collectInfos(); + createFlow(); insertConditions(false); insertConditions(true); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ef54385c452..64f206ea92eb 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2630,9 +2630,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); - recordVectorLoopValueForInductionCast(II, LastInduction, Part); + if (isa(EntryVal)) addMetadata(LastInduction, EntryVal); + else + recordVectorLoopValueForInductionCast(II, LastInduction, Part); + LastInduction = cast(addFastMathFlag( Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"))); } @@ -2754,15 +2757,17 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { // If we haven't yet vectorized the induction variable, splat the scalar // induction variable, and build the necessary step vectors. + // TODO: Don't do it unless the vectorized IV is really required. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); for (unsigned Part = 0; Part < UF; ++Part) { Value *EntryPart = getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode()); VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); - recordVectorLoopValueForInductionCast(ID, EntryPart, Part); if (Trunc) addMetadata(EntryPart, Trunc); + else + recordVectorLoopValueForInductionCast(ID, EntryPart, Part); } } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index a7ccd3faec44..f301fc361abc 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1347,7 +1347,6 @@ void BoUpSLP::buildTree(ArrayRef Roots, DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " << Lane << " from " << *Scalar << ".\n"); ExternalUses.emplace_back(Scalar, nullptr, Lane); - continue; } for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); @@ -4417,13 +4416,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; Value *VL[] = { A, B }; - return tryToVectorizeList(VL, R, None, true); + return tryToVectorizeList(VL, R, true); } bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector, - bool AllowReorder, - bool NeedExtraction) { + bool AllowReorder) { if (VL.size() < 2) return false; @@ -4517,12 +4514,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << "\n"); ArrayRef Ops = VL.slice(I, OpsWidth); - ArrayRef EmptyArray; - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(I, OpsWidth); - - R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice); + R.buildTree(Ops); // TODO: check if we can allow reordering for more cases. if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to @@ -4530,7 +4522,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // reductions. However, at this point, we only expect to get here when // there are exactly two operations. assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; R.buildTree(ReorderedOps, None); } @@ -4550,31 +4541,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << " and with tree size " << ore::NV("TreeSize", R.getTreeSize())); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are - // undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The - // vectorized root will precede it. This guarantees that we get an - // instruction. The vectorized tree could have been constant folded. - Instruction *InsertAfter = cast(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast(V); - assert(isa(I) || isa(I)); - Instruction *Extract = - cast(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->moveAfter(Extract); - InsertAfter = I; - } - } + R.vectorizeTree(); // Move to the next bundle. I += VF - 1; NextInst = I + 1; @@ -5495,11 +5462,9 @@ class HorizontalReduction { /// /// Returns true if it matches static bool findBuildVector(InsertElementInst *LastInsertElem, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V = nullptr; do { - BuildVector.push_back(LastInsertElem); BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); V = LastInsertElem->getOperand(0); if (isa(V)) @@ -5508,7 +5473,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, if (!LastInsertElem || !LastInsertElem->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5517,11 +5481,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, /// /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V; do { - BuildVector.push_back(IV); BuildVectorOpds.push_back(IV->getInsertedValueOperand()); V = IV->getAggregateOperand(); if (isa(V)) @@ -5530,7 +5492,6 @@ static bool findBuildAggregate(InsertValueInst *IV, if (!IV || !IV->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5706,27 +5667,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, if (!R.canMapToVector(IVI->getType(), DL)) return false; - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds)) + if (!findBuildAggregate(IVI, BuildVectorOpds)) return false; DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); // Aggregate value is unlikely to be processed in vector register, we need to // extract scalars into scalar registers, so NeedExtraction is set true. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true); + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildVector(IEI, BuildVector, BuildVectorOpds)) + if (!findBuildVector(IEI, BuildVectorOpds)) return false; // Vectorize starting with the build vector operands ignoring the BuildVector // instructions for the purpose of scheduling and user extraction. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector); + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, @@ -5804,8 +5763,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // is done when there are exactly two elements since tryToVectorizeList // asserts that there are only two values when AllowReorder is true. bool AllowReorder = NumElts == 2; - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, - None, AllowReorder)) { + if (NumElts > 1 && + tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; diff --git a/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir new file mode 100644 index 000000000000..12cd832665b3 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + @foo1 = common global [1073741824 x i32] zeroinitializer, align 4 + @foo2 = common global [1073741824 x i32] zeroinitializer, align 4 + + define i32 @gv_large() { + entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + ret i32 %add + } + +... +--- +name: gv_large +legalized: true +regBankSelected: true +stack: + - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.1: + ; CHECK-LABEL: name: gv_large + ; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0 + ; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16 + ; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32 + ; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]] + ; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0 + ; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16 + ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 + ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] + ; CHECK: STRWui %wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] + ; CHECK: %w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit %w0 + %1:gpr(s32) = G_CONSTANT i32 0 + %4:gpr(p0) = G_GLOBAL_VALUE @foo1 + %3:gpr(p0) = COPY %4(p0) + %7:gpr(p0) = G_GLOBAL_VALUE @foo2 + %6:gpr(p0) = COPY %7(p0) + %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval + G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval) + %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + %8:gpr(s32) = G_ADD %2, %5 + %w0 = COPY %8(s32) + RET_ReallyLR implicit %w0 + +... diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll index 49f716547b12..1a5cd2dc4233 100644 --- a/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -629,14 +629,29 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret ret i8 %old } +define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i8_1: + %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %success = extractvalue { i8, i1 } %pair, 1 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 + +; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + ret i1 %success +} + define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire @@ -644,14 +659,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - -; CHECK: casah w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret ret i16 %old } +define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i16_1: + %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire + %success = extractvalue { i16, i1 } %pair, 1 + +; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 + +; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + + ret i1 %success +} + define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new acquire acquire diff --git a/test/CodeGen/AMDGPU/multilevel-break.ll b/test/CodeGen/AMDGPU/multilevel-break.ll index 8cc02d497098..5b556f12f0d6 100644 --- a/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/test/CodeGen/AMDGPU/multilevel-break.ll @@ -66,9 +66,10 @@ ENDIF: ; preds = %LOOP ; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop( ; OPT: llvm.amdgcn.break +; OPT: llvm.amdgcn.break +; OPT: llvm.amdgcn.if.break +; OPT: llvm.amdgcn.if.break ; OPT: llvm.amdgcn.loop -; OPT: llvm.amdgcn.if.break -; OPT: llvm.amdgcn.if.break ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 672549c8ea63..96d2841e685f 100644 --- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -124,55 +124,100 @@ bb23: ; preds = %bb10 ; Earlier version of above, before a run of the structurizer. ; IR-LABEL: @nested_loop_conditions( -; IR: Flow7: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17) -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15) -; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 -; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 -; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8 +; IR: %tmp1235 = icmp slt i32 %tmp1134, 9 +; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow -; IR: Flow1: -; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ] -; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ] -; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ] -; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi) -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) -; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17) -; IR-NEXT: br i1 %18, label %Flow7, label %bb14 - -; IR: Flow2: -; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ] -; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ] -; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ] -; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ] -; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23) -; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0 -; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1 -; IR-NEXT: br i1 %25, label %bb21, label %Flow3 - -; IR: bb21: -; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %27 = xor i1 %tmp12, true -; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken) -; IR-NEXT: br label %Flow3 +; IR: bb14.lr.ph: +; IR: br label %bb14 ; IR: Flow3: -; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ] -; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ] -; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] -; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] -; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26) -; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4 +; IR: call void @llvm.amdgcn.end.cf(i64 %18) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17) +; IR: %1 = extractvalue { i1, i64 } %0, 0 +; IR: %2 = extractvalue { i1, i64 } %0, 1 +; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 + +; IR: bb4.bb13_crit_edge: +; IR: br label %Flow4 + +; IR: Flow4: +; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %2) +; IR: br label %Flow + +; IR: bb13: +; IR: br label %bb31 + +; IR: Flow: +; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4) +; IR: %6 = extractvalue { i1, i64 } %5, 0 +; IR: %7 = extractvalue { i1, i64 } %5, 1 +; IR: br i1 %6, label %bb13, label %bb31 + +; IR: bb14: +; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ] +; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ] +; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ] +; IR: %tmp15 = icmp eq i32 %tmp1037, 1 +; IR: %8 = xor i1 %tmp15, true +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: %10 = extractvalue { i1, i64 } %9, 0 +; IR: %11 = extractvalue { i1, i64 } %9, 1 +; IR: br i1 %10, label %bb31.loopexit, label %Flow1 + +; IR: Flow1: +; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11) +; IR: %13 = extractvalue { i1, i64 } %12, 0 +; IR: %14 = extractvalue { i1, i64 } %12, 1 +; IR: br i1 %13, label %bb16, label %Flow2 + +; IR: bb16: +; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32> +; IR: br label %bb18 + +; IR: Flow2: +; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ] +; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ] +; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ] +; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ] +; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi) +; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18) +; IR: br i1 %19, label %Flow3, label %bb14 + +; IR: bb18: +; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef +; IR: %tmp20 = icmp slt i32 %tmp19, 9 +; IR: br i1 %tmp20, label %bb21, label %bb18 + +; IR: bb21: +; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1 +; IR: %tmp23 = lshr i32 %tmp22, 16 +; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23 +; IR: %tmp25 = uitofp i32 %tmp24 to float +; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000 +; IR: %tmp27 = fsub float %tmp26, undef +; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01 +; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2 +; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29 +; IR: %tmp7 = zext i32 %tmp30 to i64 +; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7 +; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16 +; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0 +; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef +; IR: %tmp12 = icmp slt i32 %tmp11, 9 +; IR: %20 = xor i1 %tmp12, true +; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken) +; IR: br label %Flow2 + +; IR: bb31.loopexit: +; IR: br label %Flow1 ; IR: bb31: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef -; IR-NEXT: ret void +; IR: call void @llvm.amdgcn.end.cf(i64 %7) +; IR: store volatile i32 0, i32 addrspace(1)* undef +; IR: ret void ; GCN-LABEL: {{^}}nested_loop_conditions: diff --git a/test/CodeGen/ARM/and-load-combine.ll b/test/CodeGen/ARM/and-load-combine.ll index 2b92778f469d..69b00ed4853a 100644 --- a/test/CodeGen/ARM/and-load-combine.ll +++ b/test/CodeGen/ARM/and-load-combine.ll @@ -852,8 +852,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { ; ARM: @ %bb.0: @ %entry ; ARM-NEXT: ldrb r0, [r0] ; ARM-NEXT: uxtb r2, r2 -; ARM-NEXT: and r0, r0, r1 -; ARM-NEXT: uxtb r1, r0 +; ARM-NEXT: and r1, r0, r1 ; ARM-NEXT: mov r0, #0 ; ARM-NEXT: cmp r1, r2 ; ARM-NEXT: movweq r0, #1 @@ -863,8 +862,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { ; ARMEB: @ %bb.0: @ %entry ; ARMEB-NEXT: ldrb r0, [r0] ; ARMEB-NEXT: uxtb r2, r2 -; ARMEB-NEXT: and r0, r0, r1 -; ARMEB-NEXT: uxtb r1, r0 +; ARMEB-NEXT: and r1, r0, r1 ; ARMEB-NEXT: mov r0, #0 ; ARMEB-NEXT: cmp r1, r2 ; ARMEB-NEXT: movweq r0, #1 @@ -872,9 +870,8 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { ; ; THUMB1-LABEL: test6: ; THUMB1: @ %bb.0: @ %entry -; THUMB1-NEXT: ldrb r0, [r0] -; THUMB1-NEXT: ands r0, r1 -; THUMB1-NEXT: uxtb r3, r0 +; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1-NEXT: ands r3, r1 ; THUMB1-NEXT: uxtb r2, r2 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 @@ -889,8 +886,7 @@ define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { ; THUMB2: @ %bb.0: @ %entry ; THUMB2-NEXT: ldrb r0, [r0] ; THUMB2-NEXT: uxtb r2, r2 -; THUMB2-NEXT: ands r0, r1 -; THUMB2-NEXT: uxtb r1, r0 +; THUMB2-NEXT: ands r1, r0 ; THUMB2-NEXT: movs r0, #0 ; THUMB2-NEXT: cmp r1, r2 ; THUMB2-NEXT: it eq diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll index a136e44fc196..fec116677085 100644 --- a/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -49,9 +49,10 @@ entry: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 ; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 ; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0 +; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]] ; CHECK-THUMBV6-NEXT: movs r0, #1 ; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0 -; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]] +; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]] ; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]] ; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]] ; CHECK-THUMBV6-NEXT: [[END]]: diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll index f8ad2bbbbe0e..b49378d6702e 100644 --- a/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/test/CodeGen/ARM/cmpxchg-O0.ll @@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]] +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]] ; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 ; CHECK: dmb ish %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic @@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]] +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]] ; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 ; CHECK: dmb ish %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic diff --git a/test/CodeGen/ARM/global-merge-dllexport.ll b/test/CodeGen/ARM/global-merge-dllexport.ll new file mode 100644 index 000000000000..680f57d0a17b --- /dev/null +++ b/test/CodeGen/ARM/global-merge-dllexport.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s + +@x = global i32 0, align 4 +@y = dllexport global i32 0, align 4 + +define void @f1(i32 %a1, i32 %a2) { +; CHECK: f1: +; CHECK: movw [[REG1:r[0-9]+]], :lower16:x +; CHECK: movt [[REG1]], :upper16:x + store i32 %a1, i32* @x, align 4 + store i32 %a2, i32* @y, align 4 + ret void +} + +; CHECK-NOT: .L_MergedGlobals diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll index 03c977614320..25bbd0869581 100644 --- a/test/CodeGen/ARM/global-merge-external.ll +++ b/test/CodeGen/ARM/global-merge-external.ll @@ -1,8 +1,9 @@ -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefix=CHECK-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE -; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE +; RUN: llc < %s -mtriple=thumbv7-win32 -arm-global-merge | FileCheck %s --check-prefixes=CHECK-WIN32 @x = global i32 0, align 4 @y = global i32 0, align 4 @@ -10,10 +11,13 @@ define void @f1(i32 %a1, i32 %a2) { ;CHECK: f1: -;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]] +;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.?LCPI[0-9]+_[0-9]]] ;CHECK: [[LABEL1]]: ;CHECK-MERGE: .long .L_MergedGlobals ;CHECK-NO-MERGE: .long {{_?x}} +;CHECK-WIN32: f1: +;CHECK-WIN32: movw [[REG1:r[0-9]+]], :lower16:.L_MergedGlobals +;CHECK-WIN32: movt [[REG1]], :upper16:.L_MergedGlobals store i32 %a1, i32* @x, align 4 store i32 %a2, i32* @y, align 4 ret void @@ -21,10 +25,13 @@ define void @f1(i32 %a1, i32 %a2) { define void @g1(i32 %a1, i32 %a2) { ;CHECK: g1: -;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]] +;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.?LCPI[0-9]+_[0-9]]] ;CHECK: [[LABEL2]]: ;CHECK-MERGE: .long .L_MergedGlobals ;CHECK-NO-MERGE: .long {{_?y}} +;CHECK-WIN32: g1: +;CHECK-WIN32: movw [[REG2:r[0-9]+]], :lower16:.L_MergedGlobals +;CHECK-WIN32: movt [[REG2]], :upper16:.L_MergedGlobals store i32 %a1, i32* @y, align 4 store i32 %a2, i32* @z, align 4 ret void @@ -35,6 +42,7 @@ define void @g1(i32 %a1, i32 %a2) { ;CHECK-MERGE: .type .L_MergedGlobals,%object ;CHECK-MERGE: .local .L_MergedGlobals ;CHECK-MERGE: .comm .L_MergedGlobals,12,4 +;CHECK-WIN32: .lcomm .L_MergedGlobals,12,4 ;CHECK-MERGE: .globl x ;CHECK-MERGE: x = .L_MergedGlobals @@ -45,3 +53,10 @@ define void @g1(i32 %a1, i32 %a2) { ;CHECK-MERGE: .globl z ;CHECK-MERGE: z = .L_MergedGlobals+8 ;CHECK-MERGE: .size z, 4 + +;CHECK-WIN32: .globl x +;CHECK-WIN32: x = .L_MergedGlobals +;CHECK-WIN32: .globl y +;CHECK-WIN32: y = .L_MergedGlobals+4 +;CHECK-WIN32: .globl z +;CHECK-WIN32: z = .L_MergedGlobals+8 diff --git a/test/CodeGen/ARM/peephole-phi.mir b/test/CodeGen/ARM/peephole-phi.mir new file mode 100644 index 000000000000..30343654dea1 --- /dev/null +++ b/test/CodeGen/ARM/peephole-phi.mir @@ -0,0 +1,67 @@ +# RUN: llc -o - %s -mtriple=armv7-- -verify-machineinstrs -run-pass=peephole-opt | FileCheck %s +# +# Make sure we do not crash on this input. +# Note that this input could in principle be optimized, but right now we don't +# have this case implemented so the output should simply be unchanged. +# +# CHECK-LABEL: name: func +# CHECK: body: | +# CHECK: bb.0: +# CHECK: Bcc %bb.2, 1, undef %cpsr +# +# CHECK: bb.1: +# CHECK: %0:dpr = IMPLICIT_DEF +# CHECK: %1:gpr, %2:gpr = VMOVRRD %0, 14, %noreg +# CHECK: B %bb.3 +# +# CHECK: bb.2: +# CHECK: %3:spr = IMPLICIT_DEF +# CHECK: %4:gpr = VMOVRS %3, 14, %noreg +# +# CHECK: bb.3: +# CHECK: %5:gpr = PHI %1, %bb.1, %4, %bb.2 +# CHECK: %6:spr = VMOVSR %5, 14, %noreg +--- +name: func0 +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %0:dpr = IMPLICIT_DEF + %1:gpr, %2:gpr = VMOVRRD %0:dpr, 14, %noreg + B %bb.3 + + bb.2: + %3:spr = IMPLICIT_DEF + %4:gpr = VMOVRS %3:spr, 14, %noreg + + bb.3: + %5:gpr = PHI %1, %bb.1, %4, %bb.2 + %6:spr = VMOVSR %5, 14, %noreg +... + +# CHECK-LABEL: name: func1 +# CHECK: %6:spr = PHI %0, %bb.1, %2, %bb.2 +# CHEKC: %7:spr = COPY %6 +--- +name: func1 +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %1:spr = IMPLICIT_DEF + %0:gpr = VMOVRS %1, 14, %noreg + B %bb.3 + + bb.2: + %3:spr = IMPLICIT_DEF + %2:gpr = VMOVRS %3:spr, 14, %noreg + + bb.3: + %4:gpr = PHI %0, %bb.1, %2, %bb.2 + %5:spr = VMOVSR %4, 14, %noreg +... diff --git a/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll new file mode 100644 index 000000000000..093899690d07 --- /dev/null +++ b/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Make sure that a negative value for the compare-and-swap is zero extended +; from i8/i16 to i32 since it will be compared for equality. +; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7 + +@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00" +@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00" +@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00" + +define signext i32 @main() { +; CHECK-LABEL: main: +; CHECK: li 3, -32477 +; CHECK: lis 12, 0 +; CHECK: li 6, 234 +; CHECK: sth 3, 46(1) +; CHECK: ori 4, 12, 33059 +; CHECK: sync +; CHECK: .LBB0_1: # %L.entry +; CHECK: lharx 3, 0, 5 +; CHECK: cmpw 4, 3 +; CHECK: bne 0, .LBB0_3 +; CHECK: sthcx. 6, 0, 5 +; CHECK: bne 0, .LBB0_1 +; CHECK: b .LBB0_4 +; CHECK: .LBB0_3: # %L.entry +; CHECK: sthcx. 3, 0, 5 +; CHECK: .LBB0_4: # %L.entry +; CHECK: cmplwi 3, 33059 +; CHECK: lwsync +; CHECK: lhz 3, 46(1) +; CHECK: cmplwi 3, 234 +; +; CHECK-P7-LABEL: main: +; CHECK-P7: lis 4, 0 +; CHECK-P7: li 7, 0 +; CHECK-P7: li 3, -32477 +; CHECK-P7: sth 3, 46(1) +; CHECK-P7: li 5, 234 +; CHECK-P7: ori 4, 4, 33059 +; CHECK-P7: rlwinm 3, 6, 3, 27, 27 +; CHECK-P7: ori 7, 7, 65535 +; CHECK-P7: sync +; CHECK-P7: slw 8, 5, 3 +; CHECK-P7: slw 5, 7, 3 +; CHECK-P7: slw 9, 4, 3 +; CHECK-P7: and 7, 8, 5 +; CHECK-P7: rldicr 4, 6, 0, 61 +; CHECK-P7: and 8, 9, 5 +; CHECK-P7: .LBB0_1: # %L.entry +; CHECK-P7: lwarx 9, 0, 4 +; CHECK-P7: and 6, 9, 5 +; CHECK-P7: cmpw 0, 6, 8 +; CHECK-P7: bne 0, .LBB0_3 +; CHECK-P7: andc 9, 9, 5 +; CHECK-P7: or 9, 9, 7 +; CHECK-P7: stwcx. 9, 0, 4 +; CHECK-P7: bne 0, .LBB0_1 +; CHECK-P7: b .LBB0_4 +; CHECK-P7: .LBB0_3: # %L.entry +; CHECK-P7: stwcx. 9, 0, 4 +; CHECK-P7: .LBB0_4: # %L.entry +; CHECK-P7: srw 3, 6, 3 +; CHECK-P7: lwsync +; CHECK-P7: cmplwi 3, 33059 +; CHECK-P7: lhz 3, 46(1) +; CHECK-P7: cmplwi 3, 234 +L.entry: + %value.addr = alloca i16, align 2 + store i16 -32477, i16* %value.addr, align 2 + %0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst + %1 = extractvalue { i16, i1 } %0, 1 + br i1 %1, label %L.B0000, label %L.B0003 + +L.B0003: ; preds = %L.entry + %puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0)) + ret i32 1 + +L.B0000: ; preds = %L.entry + %2 = load i16, i16* %value.addr, align 2 + %3 = icmp eq i16 %2, 234 + br i1 %3, label %L.B0001, label %L.B0005 + +L.B0005: ; preds = %L.B0000 + %puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0)) + ret i32 1 + +L.B0001: ; preds = %L.B0000 + %puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0)) + ret i32 0 +} + +; Function Attrs: nounwind +declare i32 @puts(i8* nocapture readonly) #0 diff --git a/test/CodeGen/PowerPC/atomics-regression.ll b/test/CodeGen/PowerPC/atomics-regression.ll index 7079f6dd52e9..daf55fc426d0 100644 --- a/test/CodeGen/PowerPC/atomics-regression.ll +++ b/test/CodeGen/PowerPC/atomics-regression.ll @@ -404,6 +404,7 @@ define void @test39() { define void @test40(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test40: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: b .LBB40_2 ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB40_1: @@ -423,6 +424,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) { define void @test41(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test41: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: .LBB41_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -444,6 +446,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) { define void @test42(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test42: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: .LBB42_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -465,6 +468,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) { define void @test43(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test43: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB43_2 ; PPC64LE-NEXT: .p2align 5 @@ -485,6 +489,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) { define void @test44(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test44: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB44_2 ; PPC64LE-NEXT: .p2align 5 @@ -505,6 +510,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) { define void @test45(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test45: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB45_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -527,6 +533,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) { define void @test46(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test46: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB46_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -549,6 +556,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) { define void @test47(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test47: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB47_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -571,6 +579,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) { define void @test48(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test48: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB48_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -593,6 +602,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) { define void @test49(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test49: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB49_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -615,6 +625,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) { define void @test50(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test50: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: b .LBB50_2 ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB50_1: @@ -634,6 +645,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) { define void @test51(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test51: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: .LBB51_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -655,6 +667,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) { define void @test52(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test52: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: .LBB52_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -676,6 +689,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) { define void @test53(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test53: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB53_2 ; PPC64LE-NEXT: .p2align 5 @@ -696,6 +710,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) { define void @test54(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test54: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB54_2 ; PPC64LE-NEXT: .p2align 5 @@ -716,6 +731,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) { define void @test55(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test55: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB55_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -738,6 +754,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) { define void @test56(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test56: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB56_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -760,6 +777,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) { define void @test57(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test57: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB57_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -782,6 +800,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) { define void @test58(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test58: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB58_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -804,6 +823,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) { define void @test59(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test59: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB59_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -1248,6 +1268,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) { define void @test80(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test80: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: b .LBB80_2 ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB80_1: @@ -1267,6 +1288,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) { define void @test81(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test81: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: .LBB81_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -1288,6 +1310,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) { define void @test82(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test82: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: .LBB82_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -1309,6 +1332,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) { define void @test83(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test83: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB83_2 ; PPC64LE-NEXT: .p2align 5 @@ -1329,6 +1353,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) { define void @test84(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test84: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB84_2 ; PPC64LE-NEXT: .p2align 5 @@ -1349,6 +1374,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) { define void @test85(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test85: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB85_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -1371,6 +1397,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) { define void @test86(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test86: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB86_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -1393,6 +1420,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) { define void @test87(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test87: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB87_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -1415,6 +1443,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) { define void @test88(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test88: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB88_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -1437,6 +1466,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) { define void @test89(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test89: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB89_1: ; PPC64LE-NEXT: lbarx 6, 0, 3 @@ -1459,6 +1489,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) { define void @test90(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test90: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: b .LBB90_2 ; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB90_1: @@ -1478,6 +1509,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) { define void @test91(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test91: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: .LBB91_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -1499,6 +1531,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) { define void @test92(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test92: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: .LBB92_1: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 @@ -1520,6 +1553,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) { define void @test93(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test93: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB93_2 ; PPC64LE-NEXT: .p2align 5 @@ -1540,6 +1574,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) { define void @test94(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test94: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: b .LBB94_2 ; PPC64LE-NEXT: .p2align 5 @@ -1560,6 +1595,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) { define void @test95(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test95: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB95_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -1582,6 +1618,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) { define void @test96(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test96: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB96_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -1604,6 +1641,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) { define void @test97(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test97: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB97_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -1626,6 +1664,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) { define void @test98(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test98: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB98_1: ; PPC64LE-NEXT: lharx 6, 0, 3 @@ -1648,6 +1687,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) { define void @test99(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test99: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB99_1: ; PPC64LE-NEXT: lharx 6, 0, 3 diff --git a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 333efb04913d..1a483355319f 100644 --- a/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -4780,3 +4780,42 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub ret <2 x double> %res } +; PR35977 +define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) { +; CHECK-LABEL: test_zext_v8i8_to_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, (%rsi) +; CHECK-NEXT: retq + %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0 + %tmp2 = load <8 x i8>, <8 x i8>* %tmp + %tmp3 = extractelement <8 x i8> %tmp2, i32 0 + %tmp4 = zext i8 %tmp3 to i16 + %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0 + %tmp6 = extractelement <8 x i8> %tmp2, i32 1 + %tmp7 = zext i8 %tmp6 to i16 + %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1 + %tmp9 = extractelement <8 x i8> %tmp2, i32 2 + %tmp10 = zext i8 %tmp9 to i16 + %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2 + %tmp12 = extractelement <8 x i8> %tmp2, i32 3 + %tmp13 = zext i8 %tmp12 to i16 + %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3 + %tmp15 = extractelement <8 x i8> %tmp2, i32 4 + %tmp16 = zext i8 %tmp15 to i16 + %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4 + %tmp18 = extractelement <8 x i8> %tmp2, i32 5 + %tmp19 = zext i8 %tmp18 to i16 + %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5 + %tmp21 = extractelement <8 x i8> %tmp2, i32 6 + %tmp22 = zext i8 %tmp21 to i16 + %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6 + %tmp24 = extractelement <8 x i8> %tmp2, i32 7 + %tmp25 = zext i8 %tmp24 to i16 + %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7 + %tmp27 = shl <8 x i16> %tmp26, + %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0 + store <8 x i16> %tmp27, <8 x i16>* %tmp28 + ret void +} diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll index 410d67ff0ec1..3d03ec677a01 100644 --- a/test/CodeGen/X86/darwin-bzero.ll +++ b/test/CodeGen/X86/darwin-bzero.ll @@ -1,10 +1,13 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck -check-prefixes=CHECK,BZERO %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck -check-prefixes=CHECK,NOBZERO %s +; RUN: llc < %s -mtriple=x86_64-apple-ios10.0-simulator | FileCheck -check-prefixes=CHECK,NOBZERO %s declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind ; CHECK-LABEL: foo: -; CHECK: {{calll|callq}} ___bzero +; BZERO: {{calll|callq}} ___bzero +; NOBZERO-NOT: bzero define void @foo(i8* %p, i32 %len) { call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false) ret void diff --git a/test/CodeGen/X86/inline-asm-A-constraint.ll b/test/CodeGen/X86/inline-asm-A-constraint.ll index 7975b318eff5..2ad011e88e0d 100644 --- a/test/CodeGen/X86/inline-asm-A-constraint.ll +++ b/test/CodeGen/X86/inline-asm-A-constraint.ll @@ -19,7 +19,8 @@ entry: %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1 ret { i64, i64 } %.fca.1.insert } -; CHECK: lock cmpxchg16b +; CHECK: lock +; CHECK-NEXT: cmpxchg16b attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/X86/pr35761.ll b/test/CodeGen/X86/pr35761.ll new file mode 100644 index 000000000000..0bf81bff841f --- /dev/null +++ b/test/CodeGen/X86/pr35761.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux %s -o - | FileCheck %s + +@x = global i8 0, align 1 +@y = global i32 0, align 4 +@z = global i24 0, align 4 + +define void @PR35761(i32 %call) { +; CHECK-LABEL: PR35761: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzbl {{.*}}(%rip), %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movzbl {{.*}}(%rip), %ecx +; CHECK-NEXT: xorl $255, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: movw %cx, {{.*}}(%rip) +; CHECK-NEXT: movb $0, z+{{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = load i8, i8* @x, align 1 + %tobool = trunc i8 %0 to i1 + %conv = zext i1 %tobool to i32 + %or = or i32 32767, %call + %neg = xor i32 %or, -1 + %neg1 = xor i32 %neg, -1 + %1 = load i32, i32* @y, align 4 + %xor = xor i32 %neg1, %1 + %or2 = or i32 %conv, %xor + %conv3 = trunc i32 %or2 to i8 + %bf.load = load i24, i24* @z, align 4 + %2 = zext i8 %conv3 to i24 + %bf.value = and i24 %2, 4194303 + store i24 %bf.value, i24* @z, align 2 + ret void +} + diff --git a/test/CodeGen/X86/pr35972.ll b/test/CodeGen/X86/pr35972.ll new file mode 100644 index 000000000000..09363fbc89bb --- /dev/null +++ b/test/CodeGen/X86/pr35972.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu %s -o - -mattr=avx512bw | FileCheck %s + +define void @test3(i32 %c, <64 x i1>* %ptr) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: sbbl %ecx, %ecx +; CHECK-NEXT: kmovd %ecx, %k0 +; CHECK-NEXT: kunpckdq %k0, %k0, %k0 +; CHECK-NEXT: kmovq %k0, (%eax) +; CHECK-NEXT: retl + %cmp = icmp eq i32 %c, 0 + %insert = insertelement <64 x i1> undef, i1 %cmp, i32 0 + %shuf = shufflevector <64 x i1> %insert, <64 x i1> undef, <64 x i32> zeroinitializer + store <64 x i1> %shuf, <64 x i1>* %ptr + ret void +} + diff --git a/test/CodeGen/X86/pr37563.ll b/test/CodeGen/X86/pr37563.ll new file mode 100644 index 000000000000..934902d8e0d0 --- /dev/null +++ b/test/CodeGen/X86/pr37563.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s + +%struct.S = type <{ i16, i24, [5 x i8], i8, i16, [2 x i8] }> + +@z = global { i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] } { i16 -724, i8 94, i8 -18, i8 5, i8 undef, i8 96, i8 104, i8 -24, i8 10, i8 0, [5 x i8] undef }, align 8 +@tf_3_var_136 = global i64 0, align 8 +@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1 + +define void @PR35763() { +; CHECK-LABEL: PR35763: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzwl {{.*}}(%rip), %eax +; CHECK-NEXT: movzwl z+{{.*}}(%rip), %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: movq %rcx, {{.*}}(%rip) +; CHECK-NEXT: movl z+{{.*}}(%rip), %eax +; CHECK-NEXT: movzbl z+{{.*}}(%rip), %ecx +; CHECK-NEXT: shlq $32, %rcx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: movabsq $1090921758719, %rax # imm = 0xFE0000FFFF +; CHECK-NEXT: andq %rcx, %rax +; CHECK-NEXT: movl %eax, z+{{.*}}(%rip) +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movb %al, z+{{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 0), align 8 + %conv = sext i16 %0 to i32 + %bf.load = load i32, i32* bitcast (i24* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 1) to i32*), align 2 + %bf.clear = and i32 %bf.load, 2097151 + %bf.cast = zext i32 %bf.clear to i64 + %conv1 = trunc i64 %bf.cast to i32 + %or = or i32 %conv, %conv1 + %conv2 = trunc i32 %or to i16 + %conv3 = zext i16 %conv2 to i64 + store i64 %conv3, i64* @tf_3_var_136, align 8 + %bf.load4 = load i40, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2 + %bf.clear5 = and i40 %bf.load4, -8589869057 + store i40 %bf.clear5, i40* bitcast ([5 x i8]* getelementptr inbounds (%struct.S, %struct.S* bitcast ({ i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, [5 x i8] }* @z to %struct.S*), i32 0, i32 2) to i40*), align 2 + ret void +} diff --git a/test/CodeGen/X86/var-permute-128.ll b/test/CodeGen/X86/var-permute-128.ll index fb5f02e8d5d2..ba78cf7ee180 100644 --- a/test/CodeGen/X86/var-permute-128.ll +++ b/test/CodeGen/X86/var-permute-128.ll @@ -207,13 +207,12 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind { define <16 x i8> @var_shuffle_v16i8(<16 x i8> %v, <16 x i8> %indices) nounwind { ; SSSE3-LABEL: var_shuffle_v16i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb %xmm0, %xmm1 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; AVX-LABEL: var_shuffle_v16i8: ; AVX: # %bb.0: -; AVX-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %index0 = extractelement <16 x i8> %indices, i32 0 %index1 = extractelement <16 x i8> %indices, i32 1 diff --git a/test/CodeGen/X86/var-permute-256.ll b/test/CodeGen/X86/var-permute-256.ll index 3baab2476d40..b624fb087193 100644 --- a/test/CodeGen/X86/var-permute-256.ll +++ b/test/CodeGen/X86/var-permute-256.ll @@ -1277,3 +1277,183 @@ define <8 x float> @var_shuffle_v8f32(<8 x float> %v, <8 x i32> %indices) nounwi %ret7 = insertelement <8 x float> %ret6, float %v7, i32 7 ret <8 x float> %ret7 } + +define <8 x i32> @pr35820(<4 x i32> %v, <8 x i32> %indices) unnamed_addr nounwind { +; AVX1-LABEL: pr35820: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpextrq $1, %xmm1, %r8 +; AVX1-NEXT: movq %r8, %r10 +; AVX1-NEXT: shrq $30, %r10 +; AVX1-NEXT: vmovq %xmm1, %r9 +; AVX1-NEXT: movq %r9, %rsi +; AVX1-NEXT: shrq $30, %rsi +; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andl $3, %r9d +; AVX1-NEXT: andl $12, %esi +; AVX1-NEXT: andl $3, %r8d +; AVX1-NEXT: andl $12, %r10d +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: movq %rax, %rdi +; AVX1-NEXT: shrq $30, %rdi +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rdx +; AVX1-NEXT: shrq $30, %rdx +; AVX1-NEXT: andl $3, %ecx +; AVX1-NEXT: andl $12, %edx +; AVX1-NEXT: andl $3, %eax +; AVX1-NEXT: andl $12, %edi +; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rdx), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrd $2, -24(%rsp,%rax,4), %xmm0, %xmm0 +; AVX1-NEXT: vpinsrd $3, -24(%rsp,%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrd $1, -24(%rsp,%rsi), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrd $2, -24(%rsp,%r8,4), %xmm1, %xmm1 +; AVX1-NEXT: vpinsrd $3, -24(%rsp,%r10), %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; INT256-LABEL: pr35820: +; INT256: # %bb.0: # %entry +; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 +; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; INT256-NEXT: retq +entry: + %tmp1 = extractelement <8 x i32> %indices, i32 0 + %vecext2.8 = extractelement <4 x i32> %v, i32 %tmp1 + %tmp2 = extractelement <8 x i32> %indices, i32 1 + %vecext2.9 = extractelement <4 x i32> %v, i32 %tmp2 + %tmp3 = extractelement <8 x i32> %indices, i32 2 + %vecext2.10 = extractelement <4 x i32> %v, i32 %tmp3 + %tmp4 = extractelement <8 x i32> %indices, i32 3 + %vecext2.11 = extractelement <4 x i32> %v, i32 %tmp4 + %tmp5 = extractelement <8 x i32> %indices, i32 4 + %vecext2.12 = extractelement <4 x i32> %v, i32 %tmp5 + %tmp6 = extractelement <8 x i32> %indices, i32 5 + %vecext2.13 = extractelement <4 x i32> %v, i32 %tmp6 + %tmp7 = extractelement <8 x i32> %indices, i32 6 + %vecext2.14 = extractelement <4 x i32> %v, i32 %tmp7 + %tmp8 = extractelement <8 x i32> %indices, i32 7 + %vecext2.15 = extractelement <4 x i32> %v, i32 %tmp8 + %tmp9 = insertelement <8 x i32> undef, i32 %vecext2.8, i32 0 + %tmp10 = insertelement <8 x i32> %tmp9, i32 %vecext2.9, i32 1 + %tmp11 = insertelement <8 x i32> %tmp10, i32 %vecext2.10, i32 2 + %tmp12 = insertelement <8 x i32> %tmp11, i32 %vecext2.11, i32 3 + %tmp13 = insertelement <8 x i32> %tmp12, i32 %vecext2.12, i32 4 + %tmp14 = insertelement <8 x i32> %tmp13, i32 %vecext2.13, i32 5 + %tmp15 = insertelement <8 x i32> %tmp14, i32 %vecext2.14, i32 6 + %tmp16 = insertelement <8 x i32> %tmp15, i32 %vecext2.15, i32 7 + ret <8 x i32> %tmp16 +} + +define <8 x float> @pr35820_float(<4 x float> %v, <8 x i32> %indices) unnamed_addr nounwind { +; AVX1-LABEL: pr35820_float: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpextrq $1, %xmm1, %r8 +; AVX1-NEXT: movq %r8, %r10 +; AVX1-NEXT: shrq $30, %r10 +; AVX1-NEXT: vmovq %xmm1, %r9 +; AVX1-NEXT: movq %r9, %rdx +; AVX1-NEXT: shrq $30, %rdx +; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; AVX1-NEXT: andl $3, %r9d +; AVX1-NEXT: andl $12, %edx +; AVX1-NEXT: andl $3, %r8d +; AVX1-NEXT: andl $12, %r10d +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: movq %rax, %rdi +; AVX1-NEXT: shrq $30, %rdi +; AVX1-NEXT: vmovq %xmm0, %rcx +; AVX1-NEXT: movq %rcx, %rsi +; AVX1-NEXT: shrq $30, %rsi +; AVX1-NEXT: andl $3, %ecx +; AVX1-NEXT: andl $12, %esi +; AVX1-NEXT: andl $3, %eax +; AVX1-NEXT: andl $12, %edi +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; INT256-LABEL: pr35820_float: +; INT256: # %bb.0: # %entry +; INT256-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 +; INT256-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; INT256-NEXT: retq +entry: + %tmp1 = extractelement <8 x i32> %indices, i32 0 + %vecext2.8 = extractelement <4 x float> %v, i32 %tmp1 + %tmp2 = extractelement <8 x i32> %indices, i32 1 + %vecext2.9 = extractelement <4 x float> %v, i32 %tmp2 + %tmp3 = extractelement <8 x i32> %indices, i32 2 + %vecext2.10 = extractelement <4 x float> %v, i32 %tmp3 + %tmp4 = extractelement <8 x i32> %indices, i32 3 + %vecext2.11 = extractelement <4 x float> %v, i32 %tmp4 + %tmp5 = extractelement <8 x i32> %indices, i32 4 + %vecext2.12 = extractelement <4 x float> %v, i32 %tmp5 + %tmp6 = extractelement <8 x i32> %indices, i32 5 + %vecext2.13 = extractelement <4 x float> %v, i32 %tmp6 + %tmp7 = extractelement <8 x i32> %indices, i32 6 + %vecext2.14 = extractelement <4 x float> %v, i32 %tmp7 + %tmp8 = extractelement <8 x i32> %indices, i32 7 + %vecext2.15 = extractelement <4 x float> %v, i32 %tmp8 + %tmp9 = insertelement <8 x float> undef, float %vecext2.8, i32 0 + %tmp10 = insertelement <8 x float> %tmp9, float %vecext2.9, i32 1 + %tmp11 = insertelement <8 x float> %tmp10, float %vecext2.10, i32 2 + %tmp12 = insertelement <8 x float> %tmp11, float %vecext2.11, i32 3 + %tmp13 = insertelement <8 x float> %tmp12, float %vecext2.12, i32 4 + %tmp14 = insertelement <8 x float> %tmp13, float %vecext2.13, i32 5 + %tmp15 = insertelement <8 x float> %tmp14, float %vecext2.14, i32 6 + %tmp16 = insertelement <8 x float> %tmp15, float %vecext2.15, i32 7 + ret <8 x float> %tmp16 +} + +define <4 x i32> @big_source(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind { +; AVX-LABEL: big_source: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbp +; AVX-NEXT: movq %rsp, %rbp +; AVX-NEXT: andq $-32, %rsp +; AVX-NEXT: subq $64, %rsp +; AVX-NEXT: vmovq %xmm1, %rax +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: shrq $30, %rcx +; AVX-NEXT: andl $28, %ecx +; AVX-NEXT: vpextrq $1, %xmm1, %rdx +; AVX-NEXT: movq %rdx, %rsi +; AVX-NEXT: sarq $32, %rsi +; AVX-NEXT: andl $7, %eax +; AVX-NEXT: andl $7, %edx +; AVX-NEXT: vmovaps %ymm0, (%rsp) +; AVX-NEXT: andl $7, %esi +; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vpinsrd $1, (%rsp,%rcx), %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $2, (%rsp,%rdx,4), %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $3, (%rsp,%rsi,4), %xmm0, %xmm0 +; AVX-NEXT: movq %rbp, %rsp +; AVX-NEXT: popq %rbp +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +entry: + %tmp1 = extractelement <4 x i32> %indices, i32 0 + %vecext2.8 = extractelement <8 x i32> %v, i32 %tmp1 + %tmp2 = extractelement <4 x i32> %indices, i32 1 + %vecext2.9 = extractelement <8 x i32> %v, i32 %tmp2 + %tmp3 = extractelement <4 x i32> %indices, i32 2 + %vecext2.10 = extractelement <8 x i32> %v, i32 %tmp3 + %tmp4 = extractelement <4 x i32> %indices, i32 3 + %vecext2.11 = extractelement <8 x i32> %v, i32 %tmp4 + %tmp9 = insertelement <4 x i32> undef, i32 %vecext2.8, i32 0 + %tmp10 = insertelement <4 x i32> %tmp9, i32 %vecext2.9, i32 1 + %tmp11 = insertelement <4 x i32> %tmp10, i32 %vecext2.10, i32 2 + %tmp12 = insertelement <4 x i32> %tmp11, i32 %vecext2.11, i32 3 + ret <4 x i32> %tmp12 +} diff --git a/test/MC/COFF/cv-inline-linetable.s b/test/MC/COFF/cv-inline-linetable.s index 61a42d92f405..c5e28c4d0785 100644 --- a/test/MC/COFF/cv-inline-linetable.s +++ b/test/MC/COFF/cv-inline-linetable.s @@ -135,3 +135,29 @@ Ltmp1: .cv_filechecksums # File index to string table offset subsection .cv_stringtable # String table +# CHECK-LABEL: FunctionLineTable [ +# CHECK: LinkageName: ?baz@@YAXXZ +# CHECK: Flags: 0x1 +# CHECK: CodeSize: 0x3D +# CHECK: FilenameSegment [ +# CHECK: Filename: D:\src\llvm\build\t.cpp (0x0) +# CHECK: +0x0 [ +# CHECK: LineNumberStart: 13 +# CHECK: ] +# CHECK: +0x1 [ +# CHECK: LineNumberStart: 14 +# CHECK: ] +# CHECK: +0x8 [ +# CHECK: LineNumberStart: 15 +# CHECK: ] +# There shouldn't be any other line number entries because all the other +# .cv_locs are on line 15 where the top-level inline call site is. +# CHECK-NOT: LineNumberStart +# CHECK: +0x34 [ +# CHECK: LineNumberStart: 16 +# CHECK: ] +# CHECK: +0x3B [ +# CHECK: LineNumberStart: 17 +# CHECK: ] +# CHECK: ] +# CHECK: ] diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index 23846d921a8c..378af768fa99 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -99,7 +99,8 @@ // CHECK: shll $2, %eax sall $2, %eax -// CHECK: rep movsb +// CHECK: rep +// CHECK-NEXT: movsb rep # comment movsb @@ -1557,3 +1558,38 @@ ptwriteq 0xdeadbeef(%rbx,%rcx,8) // CHECK: ptwriteq %rax // CHECK: encoding: [0xf3,0x48,0x0f,0xae,0xe0] ptwriteq %rax + +// __asm __volatile( +// "pushf \n\t" +// "popf \n\t" +// "rep \n\t" +// ".byte 0x0f, 0xa7, 0xd0" +// ); +// CHECK: pushfq +// CHECK-NEXT: popfq +// CHECK-NEXT: rep +// CHECK-NEXT: .byte 15 +// CHECK-NEXT: .byte 167 +// CHECK-NEXT: .byte 208 +pushfq +popfq +rep +.byte 15 +.byte 167 +.byte 208 + +// CHECK: lock +// CHECK: cmpxchgl + cmp $0, %edx + je 1f + lock +1: cmpxchgl %ecx,(%rdi) + +// CHECK: rep +// CHECK-NEXT: byte +rep +.byte 0xa4 # movsb + +// CHECK: lock +// This line has to be the last one in the file +lock diff --git a/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll b/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll new file mode 100644 index 000000000000..9a9ee7223c90 --- /dev/null +++ b/test/ThinLTO/X86/Inputs/dicompositetype-unique2.ll @@ -0,0 +1,46 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%struct.CFVS = type { %struct.Vec } +%struct.Vec = type { i8 } +%struct.S = type { i8 } + +define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 { +entry: + %this.addr = alloca %struct.CFVS*, align 8 + store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8 + %this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8 + %m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0 + ret void +} + +declare dereferenceable(1) %struct.S* @_Z3Getv() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "bz188598-b.cpp", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS") +!10 = !DIFile(filename: "./bz188598.h", directory: "") +!11 = !{!12, !27} +!12 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !9, file: !10, line: 9, baseType: !13, size: 8) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !10, line: 4, size: 8, elements: !14, templateParams: !19, identifier: "_ZTS3VecIXadL_Z3GetvEEE") +!14 = !{!35} +!19 = !{!20} +!20 = !DITemplateValueParameter(name: "F", type: !21, value: %struct.S* ()* @_Z3Getv) +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64) +!22 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !10, line: 2, baseType: !23) +!23 = !DISubroutineType(types: !24) +!24 = !{!35} +!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false) +!28 = !DISubroutineType(types: !29) +!29 = !{null, !30} +!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/ThinLTO/X86/dicompositetype-unique2.ll b/test/ThinLTO/X86/dicompositetype-unique2.ll new file mode 100644 index 000000000000..924579569270 --- /dev/null +++ b/test/ThinLTO/X86/dicompositetype-unique2.ll @@ -0,0 +1,69 @@ +; RUN: opt -module-summary -o %t1.bc %s +; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique2.ll +; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3. +; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s +; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \ +; RUN: -r %t1.bc,_ZN1CD2Ev,pl \ +; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \ +; RUN: -r %t1.bc,_Z3Getv,l \ +; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \ +; RUN: -r %t2.bc,_Z3Getv,l +; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s + +; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}} +; CHECK: define available_externally{{.*}} void @_ZN4CFVSD2Ev + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }> +%class.A = type { %struct.Vec } +%struct.Vec = type { i8 } +%struct.CFVS = type { %struct.Vec } +%struct.S = type { i8 } + +define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 !dbg !8 { +entry: + %this.addr = alloca %class.C*, align 8 + %this1 = load %class.C*, %class.C** %this.addr, align 8 + %m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2 + call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50 + ret void +} + +declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr + +declare dereferenceable(1) %struct.S* @_Z3Getv() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !9, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !46, variables: !2) +!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !1, line: 5, size: 128, elements: !10, vtableHolder: !9, identifier: "_ZTS1C") +!10 = !{!38, !46} +!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Vec<&Get>", file: !16, line: 4, size: 8, elements: !17, templateParams: !22, identifier: "_ZTS3VecIXadL_Z3GetvEEE") +!16 = !DIFile(filename: "./bz188598.h", directory: ".") +!17 = !{!55} +!22 = !{!23} +!23 = !DITemplateValueParameter(name: "F", type: !24, value: %struct.S* ()* @_Z3Getv) +!24 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !25, size: 64) +!25 = !DIDerivedType(tag: DW_TAG_typedef, name: "Func", file: !16, line: 2, baseType: !26) +!26 = !DISubroutineType(types: !27) +!27 = !{!55} +!38 = !DIDerivedType(tag: DW_TAG_member, name: "m", scope: !9, file: !1, line: 7, baseType: !39, size: 8, offset: 72) +!39 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !16, line: 7, size: 8, elements: !40, identifier: "_ZTS4CFVS") +!40 = !{!41} +!41 = !DIDerivedType(tag: DW_TAG_member, name: "m_val", scope: !39, file: !16, line: 9, baseType: !15, size: 8) +!46 = !DISubprogram(name: "~C", scope: !9, file: !1, line: 6, type: !47, isLocal: false, isDefinition: false, scopeLine: 6, containingType: !9, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 0, flags: DIFlagPrototyped, isOptimized: false) +!47 = !DISubroutineType(types: !48) +!48 = !{!55} +!50 = !DILocation(line: 9, scope: !51) +!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9) +!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll new file mode 100644 index 000000000000..b153a8b1e53f --- /dev/null +++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-select.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -codegenprepare -disable-complex-addr-modes=false -addr-sink-new-select=true %s | FileCheck %s --check-prefix=CHECK +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +; Select when both offset and scale reg are present. +define i64 @test1(i1 %c, i64* %b, i64 %scale) { +; CHECK-LABEL: @test1 +entry: +; CHECK-LABEL: entry: + %g = getelementptr inbounds i64, i64* %b, i64 %scale + %g1 = getelementptr inbounds i64, i64* %g, i64 8 + %g2 = getelementptr inbounds i64, i64* %g, i64 16 + %s = select i1 %c, i64* %g1, i64* %g2 +; CHECK-NOT: sunkaddr + %v = load i64 , i64* %s, align 8 + ret i64 %v +} + diff --git a/test/Transforms/GVNHoist/pr35222-hoist-load.ll b/test/Transforms/GVNHoist/pr35222-hoist-load.ll index 7e9c62006162..b9b1a870a59b 100644 --- a/test/Transforms/GVNHoist/pr35222-hoist-load.ll +++ b/test/Transforms/GVNHoist/pr35222-hoist-load.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -gvn-hoist < %s | FileCheck %s +; CHECK-LABEL: build_tree ; CHECK: load ; CHECK: load ; Check that the load is not hoisted because the call can potentially @@ -23,3 +24,47 @@ do.end: ; preds = %do.body } declare i1 @pqdownheap(i32) + +@i = external hidden unnamed_addr global i32, align 4 +@j = external hidden unnamed_addr global [573 x i32], align 4 +@v = external global i1 + +; CHECK-LABEL: test +; CHECK-LABEL: do.end +; CHECK: load +; Check that the load is not hoisted because the call can potentially +; modify the global + +define i32 @test() { +entry: + br label %for.cond + +for.cond: + %a3 = load volatile i1, i1* @v + br i1 %a3, label %for.body, label %while.end + +for.body: + br label %if.then + +if.then: + %tmp4 = load i32, i32* @i, align 4 + br label %for.cond + +while.end: + br label %do.body + +do.body: + %tmp9 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j, +i32 0, i32 1), align 4 + %tmp10 = load i32, i32* @i, align 4 + call void @fn() + %a1 = load volatile i1, i1* @v + br i1 %a1, label %do.body, label %do.end + +do.end: + %tmp20 = load i32, i32* getelementptr inbounds ([573 x i32], [573 x i32]* @j, +i32 0, i32 1), align 4 + ret i32 %tmp20 +} + +declare void @fn() diff --git a/test/Transforms/JumpThreading/ddt-crash3.ll b/test/Transforms/JumpThreading/ddt-crash3.ll new file mode 100644 index 000000000000..50ac86a3fb5b --- /dev/null +++ b/test/Transforms/JumpThreading/ddt-crash3.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -jump-threading -disable-output -verify-dom-info + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@global = external local_unnamed_addr global i64, align 8 +@global.1 = external local_unnamed_addr global i64, align 8 +@global.2 = external local_unnamed_addr global i64, align 8 + +; Function Attrs: norecurse noreturn nounwind uwtable +define void @hoge() local_unnamed_addr #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb26, %bb + %tmp = load i64, i64* @global, align 8, !tbaa !1 + %tmp2 = icmp eq i64 %tmp, 0 + br i1 %tmp2, label %bb27, label %bb3 + +bb3: ; preds = %bb1 + %tmp4 = load i64, i64* @global.1, align 8, !tbaa !1 + %tmp5 = icmp eq i64 %tmp4, 0 + br i1 %tmp5, label %bb23, label %bb23 + +bb23: ; preds = %bb3, %bb3 + br label %bb26 + +bb26: ; preds = %bb27, %bb23 + br label %bb1 + +bb27: ; preds = %bb1 + br label %bb26 +} + +attributes #0 = { norecurse noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 7.0.0 "} +!1 = !{!2, !2, i64 0} +!2 = !{!"long", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/JumpThreading/ddt-crash4.ll b/test/Transforms/JumpThreading/ddt-crash4.ll new file mode 100644 index 000000000000..9bf08395d660 --- /dev/null +++ b/test/Transforms/JumpThreading/ddt-crash4.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -jump-threading -disable-output -verify-dom-info +@global = external global i64, align 8 + +define void @f() { +bb: + br label %bb1 + +bb1: + %tmp = load i64, i64* @global, align 8 + %tmp2 = icmp eq i64 %tmp, 0 + br i1 %tmp2, label %bb27, label %bb3 + +bb3: + %tmp4 = load i64, i64* @global, align 8 + %tmp5 = icmp eq i64 %tmp4, 0 + br i1 %tmp5, label %bb6, label %bb7 + +bb6: + br label %bb7 + +bb7: + %tmp8 = phi i1 [ true, %bb3 ], [ undef, %bb6 ] + %tmp9 = select i1 %tmp8, i64 %tmp4, i64 0 + br i1 false, label %bb10, label %bb23 + +bb10: + %tmp11 = load i64, i64* @global, align 8 + %tmp12 = icmp slt i64 %tmp11, 5 + br i1 %tmp12, label %bb13, label %bb17 + +bb13: + br label %bb14 + +bb14: + br i1 undef, label %bb15, label %bb16 + +bb15: + unreachable + +bb16: + br label %bb10 + +bb17: + br label %bb18 + +bb18: + br i1 undef, label %bb22, label %bb13 + +bb19: + br i1 undef, label %bb20, label %bb21 + +bb20: + unreachable + +bb21: + br label %bb18 + +bb22: + br label %bb23 + +bb23: + br i1 undef, label %bb24, label %bb13 + +bb24: + br i1 undef, label %bb26, label %bb25 + +bb25: + br label %bb19 + +bb26: + br label %bb1 + +bb27: + br label %bb24 +} diff --git a/test/Transforms/LoopVectorize/pr35773.ll b/test/Transforms/LoopVectorize/pr35773.ll new file mode 100644 index 000000000000..362ece70b898 --- /dev/null +++ b/test/Transforms/LoopVectorize/pr35773.ll @@ -0,0 +1,53 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +@a = common local_unnamed_addr global i32 0, align 4 +@b = common local_unnamed_addr global i8 0, align 1 + +; Function Attrs: norecurse nounwind uwtable +define void @doit1() local_unnamed_addr{ +entry: + br label %for.body + +for.body: + %main.iv = phi i32 [ 0, %entry ], [ %inc, %for.body ] + + %i8.iv = phi i8 [ 0, %entry ], [ %i8.add, %for.body ] + %i32.iv = phi i32 [ 0, %entry ], [ %i32.add, %for.body ] + + %trunc.to.be.converted.to.new.iv = trunc i32 %i32.iv to i8 + %i8.add = add i8 %i8.iv, %trunc.to.be.converted.to.new.iv + + %noop.conv.under.pse = and i32 %i32.iv, 255 + %i32.add = add nuw nsw i32 %noop.conv.under.pse, 9 + + %inc = add i32 %main.iv, 1 + %tobool = icmp eq i32 %inc, 16 + br i1 %tobool, label %for.cond.for.end_crit_edge, label %for.body + +; CHECK-LABEL: @doit1( +; CHECK: vector.body: +; CHECK-NEXT: [[MAIN_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[MAIN_IV_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[I8_IV:%.*]] = phi <4 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[I8_IV_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[I32_IV:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[I32_IV_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV_FROM_TRUNC:%.*]] = phi <4 x i8> [ , [[VECTOR_PH]] ], [ [[IV_FROM_TRUNC_NEXT:%.*]], [[VECTOR_BODY]] ] + +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[MAIN_IV]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[MAIN_IV]], 0 + +; CHECK-NEXT: [[I8_IV_NEXT]] = add <4 x i8> [[I8_IV]], [[IV_FROM_TRUNC]] + +; CHECK-NEXT: [[MAIN_IV_NEXT]] = add i32 [[MAIN_IV]], 4 +; CHECK-NEXT: [[I32_IV_NEXT]] = add <4 x i32> [[I32_IV]], +; CHECK-NEXT: [[IV_FROM_TRUNC_NEXT]] = add <4 x i8> [[IV_FROM_TRUNC]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[MAIN_IV_NEXT]], 16 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 + +for.cond.for.end_crit_edge: + store i8 %i8.add, i8* @b, align 1 + br label %for.end + +for.end: + ret void +} diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_1.ll b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll new file mode 100644 index 000000000000..a573fc911eef --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/PR35628_1.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +define void @mainTest(i32* %ptr) #0 { +; CHECK-LABEL: @mainTest( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[BAIL_OUT:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA5:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[PTR]] to <4 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], undef +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], undef +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], undef +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1 +; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]] +; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]] +; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]] +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef +; CHECK-NEXT: br label [[LOOP]] +; CHECK: bail_out: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp eq i32* %ptr, null + br i1 %cmp, label %loop, label %bail_out + +loop: + %dummy_phi = phi i32 [ 1, %entry ], [ %18, %loop ] + %0 = load i32, i32 * %ptr , align 4 + %1 = mul i32 %0, %0 + %2 = add i32 1, %1 + %3 = getelementptr inbounds i32, i32 * %ptr, i64 1 + %4 = load i32, i32 * %3 , align 4 + %5 = mul i32 %4, %4 + %6 = add i32 %2, %4 + %7 = add i32 %6, %5 + %8 = getelementptr inbounds i32, i32 *%ptr, i64 2 + %9 = load i32, i32 * %8 , align 4 + %10 = mul i32 %9, %9 + %11 = add i32 %7, %9 + %12 = add i32 %11, %10 + %13 = sext i32 %9 to i64 + %14 = getelementptr inbounds i32, i32 *%ptr, i64 3 + %15 = load i32, i32 * %14 , align 4 + %16 = mul i32 %15, %15 + %17 = add i32 %12, %15 + %18 = add i32 %17, %16 + br label %loop + +bail_out: + ret void +} + +attributes #0 = { "target-cpu"="westmere" } + diff --git a/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll new file mode 100644 index 000000000000..52a6d73db981 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" + +define void @test() #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP4]] +; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], +; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef +; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef +; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0 +; CHECK-NEXT: [[JOIN:%.*]] = add i64 undef, [[ZSUM]] +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32> +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i64> [[BIN_RDX]], [[RDX_SHUF1]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0 +; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]] +; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef +; CHECK-NEXT: br label [[LOOP]] +; +entry: + br label %loop + +loop: + %dummy_phi = phi i64 [ 1, %entry ], [ %last, %loop ] + %0 = phi i64 [ 2, %entry ], [ %fork, %loop ] + %inc1 = add i64 %0, 1 + %inc2 = add i64 %0, 2 + %inc11 = add i64 1, %inc1 + %exact1 = ashr exact i64 %inc11, 32 + %inc3 = add i64 %0, 3 + %dummy_add = add i16 0, 0 + %inc12 = add i64 1, %inc2 + %exact2 = ashr exact i64 %inc12, 32 + %dummy_shl = shl i64 %inc3, 32 + %inc13 = add i64 1, %inc3 + %exact3 = ashr exact i64 %inc13, 32 + %fork = add i64 %0, 0 + %sum1 = add i64 %exact3, %exact2 + %sum2 = add i64 %sum1, %exact1 + %zsum = add i64 %sum2, 0 + %sext22 = add i64 1, %fork + %exact4 = ashr exact i64 %sext22, 32 + %join = add i64 %fork, %zsum + %last = add i64 %join, %exact4 + br label %loop +} + diff --git a/test/Transforms/SLPVectorizer/X86/PR35777.ll b/test/Transforms/SLPVectorizer/X86/PR35777.ll new file mode 100644 index 000000000000..f3983d716d08 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s + +@global = local_unnamed_addr global [6 x double] zeroinitializer, align 16 + +define { i64, i64 } @patatino(double %arg) { +; CHECK-LABEL: @patatino( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1 +; CHECK-NEXT: ret { i64, i64 } [[TMP17]] +; +bb: + %tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16 + %tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16 + %tmp2 = fmul double %tmp1, %arg + %tmp3 = fadd double %tmp, %tmp2 + %tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16 + %tmp5 = fadd double %tmp4, %tmp3 + %tmp6 = fptosi double %tmp5 to i32 + %tmp7 = sext i32 %tmp6 to i64 + %tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8 + %tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8 + %tmp10 = fmul double %tmp9, %arg + %tmp11 = fadd double %tmp8, %tmp10 + %tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8 + %tmp13 = fadd double %tmp12, %tmp11 + %tmp14 = fptosi double %tmp13 to i32 + %tmp15 = sext i32 %tmp14 to i64 + %tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0 + %tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1 + ret { i64, i64 } %tmp17 +} diff --git a/test/Transforms/SLPVectorizer/X86/PR35865.ll b/test/Transforms/SLPVectorizer/X86/PR35865.ll new file mode 100644 index 000000000000..b022dd7d9155 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer < %s -S -o - -mtriple=x86_64-apple-macosx10.10.0 -mcpu=core2 | FileCheck %s + +define void @_Z10fooConvertPDv4_xS0_S0_PKS_() { +; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4 +; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32 +; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5 +; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32 +; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5 +; CHECK-NEXT: ret void +; +entry: + %0 = extractelement <16 x half> undef, i32 4 + %conv.i.4.i = fpext half %0 to float + %1 = bitcast float %conv.i.4.i to i32 + %vecins.i.4.i = insertelement <8 x i32> undef, i32 %1, i32 4 + %2 = extractelement <16 x half> undef, i32 5 + %conv.i.5.i = fpext half %2 to float + %3 = bitcast float %conv.i.5.i to i32 + %vecins.i.5.i = insertelement <8 x i32> %vecins.i.4.i, i32 %3, i32 5 + ret void +} diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index 46386e8b63e0..750a44736c97 100644 --- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -7,8 +7,8 @@ target triple = "x86_64-apple-macosx10.8.0" define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -20,8 +20,8 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) ; CHECK-NEXT: ret <4 x float> [[RD]] ; ; ZEROTHRESH-LABEL: @simple_select( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -64,18 +64,18 @@ declare void @llvm.assume(i1) nounwind ; This entire tree is ephemeral, don't vectorize any of it. define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_eph( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 -; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 ; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 @@ -100,18 +100,18 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; CHECK-NEXT: ret <4 x float> undef ; ; ZEROTHRESH-LABEL: @simple_select_eph( -; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 -; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 -; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 -; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 -; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 -; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 -; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 @@ -175,8 +175,8 @@ define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> ; doesn't matter define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_insert_out_of_order( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -188,8 +188,8 @@ define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float ; CHECK-NEXT: ret <4 x float> [[RD]] ; ; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -233,8 +233,8 @@ declare void @f32_user(float) #0 ; Multiple users of the final constructed vector define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_users( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -247,8 +247,8 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; CHECK-NEXT: ret <4 x float> [[RD]] ; ; ZEROTHRESH-LABEL: @simple_select_users( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer +; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 @@ -291,18 +291,18 @@ define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32 ; Unused insertelement define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_no_users( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 -; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 -; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 -; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 -; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer @@ -330,18 +330,18 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: ret <4 x float> [[RD]] ; ; ZEROTHRESH-LABEL: @simple_select_no_users( -; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 -; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 -; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 -; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 -; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 -; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 -; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 +; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 +; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 +; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 +; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 @@ -387,25 +387,25 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; to do this backwards this backwards define <4 x i32> @reconstruct(<4 x i32> %c) #0 { ; CHECK-LABEL: @reconstruct( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0 -; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1 -; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2 -; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3 -; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0 +; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3 ; CHECK-NEXT: ret <4 x i32> [[RD]] ; ; ZEROTHRESH-LABEL: @reconstruct( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0 -; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1 -; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 -; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2 -; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 -; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3 -; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2 +; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3 +; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0 +; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1 +; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2 +; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3 ; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -421,8 +421,8 @@ define <4 x i32> @reconstruct(<4 x i32> %c) #0 { define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_v2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 ; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 @@ -430,12 +430,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> % ; CHECK-NEXT: ret <2 x float> [[RB]] ; ; ZEROTHRESH-LABEL: @simple_select_v2( -; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0 -; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1 -; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0 -; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1 -; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0 -; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1 +; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 ; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] @@ -464,12 +464,12 @@ define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> % ; (low cost threshold needed to force this to happen) define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; CHECK-LABEL: @simple_select_partial_vector( -; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 +; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer @@ -485,12 +485,12 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, ; CHECK-NEXT: ret <4 x float> [[RB]] ; ; ZEROTHRESH-LABEL: @simple_select_partial_vector( -; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 -; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 -; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 -; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 -; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 -; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 +; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1 +; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 +; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer @@ -530,7 +530,7 @@ define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, ; must be rescheduled. The case here is from compiling Julia. define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @reschedule_extract( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 @@ -542,7 +542,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: ret <4 x float> [[V3]] ; ; ZEROTHRESH-LABEL: @reschedule_extract( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 @@ -576,7 +576,7 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { ; instructions that are erased. define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @take_credit( -; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 @@ -588,7 +588,7 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: ret <4 x float> [[V3]] ; ; ZEROTHRESH-LABEL: @take_credit( -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 @@ -622,10 +622,10 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; CHECK-LABEL: @multi_tree( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 @@ -640,10 +640,10 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; ; ZEROTHRESH-LABEL: @multi_tree( ; ZEROTHRESH-NEXT: entry: -; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 -; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 -; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 -; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 +; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0 +; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1 +; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 +; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], ; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] ; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 @@ -675,7 +675,7 @@ entry: define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 { ; CHECK-LABEL: @_vadd256( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b +; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 @@ -696,7 +696,7 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr ; ; ZEROTHRESH-LABEL: @_vadd256( ; ZEROTHRESH-NEXT: entry: -; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b +; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]] ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 ; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 diff --git a/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/test/Transforms/SLPVectorizer/X86/insertvalue.ll index 5884ee7a2675..1af11609fe6f 100644 --- a/test/Transforms/SLPVectorizer/X86/insertvalue.ll +++ b/test/Transforms/SLPVectorizer/X86/insertvalue.ll @@ -1,11 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -; CHECK-LABEL: julia_2xdouble -; CHECK: load <2 x double> -; CHECK: load <2 x double> -; CHECK: fmul <2 x double> -; CHECK: fadd <2 x double> define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) { +; CHECK-LABEL: @julia_2xdouble( +; CHECK-NEXT: top: +; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4 +; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>* +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>* +; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 +; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1 +; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0 %x0 = load double, double* %px0, align 4 @@ -29,12 +48,40 @@ top: ret void } -; CHECK-LABEL: julia_4xfloat -; CHECK: load <4 x float> -; CHECK: load <4 x float> -; CHECK: fmul <4 x float> -; CHECK: fadd <4 x float> define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) { +; CHECK-LABEL: @julia_4xfloat( +; CHECK-NEXT: top: +; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1 +; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1 +; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2 +; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2 +; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>* +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 +; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>* +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0 +; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1 +; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2 +; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>* +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0 +; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2 +; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3 +; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3 +; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0 %x0 = load float, float* %px0, align 4 @@ -76,9 +123,27 @@ top: ret void } -; CHECK-LABEL: julia_load_array_of_float -; CHECK: fsub <4 x float> define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) { +; CHECK-LABEL: @julia_load_array_of_float( +; CHECK-NEXT: top: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 +; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 +; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3 +; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %a_arr = load [4 x float], [4 x float]* %a, align 4 %a0 = extractvalue [4 x float] %a_arr, 0 @@ -102,11 +167,27 @@ top: ret void } -; CHECK-LABEL: julia_load_array_of_i32 -; CHECK: load <4 x i32> -; CHECK: load <4 x i32> -; CHECK: sub <4 x i32> define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) { +; CHECK-LABEL: @julia_load_array_of_i32( +; CHECK-NEXT: top: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 +; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 +; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 +; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 +; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3 +; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %a_arr = load [4 x i32], [4 x i32]* %a, align 4 %a0 = extractvalue [4 x i32] %a_arr, 0 @@ -132,9 +213,30 @@ top: ; Almost identical to previous test, but for type that should NOT be vectorized. ; -; CHECK-LABEL: julia_load_array_of_i16 -; CHECK-NOT: i2> define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) { +; CHECK-LABEL: @julia_load_array_of_i16( +; CHECK-NEXT: top: +; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4 +; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0 +; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2 +; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1 +; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4 +; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0 +; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2 +; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1 +; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3 +; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]] +; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3 +; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]] +; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]] +; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0 +; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1 +; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]] +; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2 +; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3 +; CHECK-NEXT: store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %a_arr = load [4 x i16], [4 x i16]* %a, align 4 %a0 = extractvalue [4 x i16] %a_arr, 0 @@ -160,11 +262,27 @@ top: %pseudovec = type { float, float, float, float } -; CHECK-LABEL: julia_load_struct_of_float -; CHECK: load <4 x float> -; CHECK: load <4 x float> -; CHECK: fsub <4 x float> define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) { +; CHECK-LABEL: @julia_load_struct_of_float( +; CHECK-NEXT: top: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 +; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 +; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3 +; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4 +; CHECK-NEXT: ret void +; top: %a_struct = load %pseudovec, %pseudovec* %a, align 4 %a0 = extractvalue %pseudovec %a_struct, 0 diff --git a/test/Transforms/SLPVectorizer/X86/value-bug.ll b/test/Transforms/SLPVectorizer/X86/value-bug.ll index 64d2ae1c7d79..7558c724a15d 100644 --- a/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -1,15 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev3-linux-gnu" ; We used to crash on this example because we were building a constant ; expression during vectorization and the vectorizer expects instructions ; as elements of the vectorized tree. -; CHECK-LABEL: @test ; PR19621 define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb279: +; CHECK-NEXT: br label [[BB283:%.*]] +; CHECK: bb283: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ] +; CHECK-NEXT: br label [[BB284:%.*]] +; CHECK: bb284: +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef +; CHECK-NEXT: br label [[BB21_I:%.*]] +; CHECK: bb21.i: +; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]] +; CHECK: bb22.i: +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]] +; CHECK-NEXT: br label [[BB32_I:%.*]] +; CHECK: bb32.i: +; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ] +; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] +; CHECK: exit: +; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double> +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]] +; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float> +; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float +; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0 +; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1 +; CHECK-NEXT: br label [[BB283]] +; bb279: br label %bb283 @@ -62,6 +93,12 @@ exit: ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree. ; The code that handles insertelement instructions must handle this. define <4 x double> @constant_folding() { +; CHECK-LABEL: @constant_folding( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1 +; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0 +; CHECK-NEXT: ret <4 x double> [[I2]] +; entry: %t0 = fadd double 1.000000e+00 , 0.000000e+00 %t1 = fadd double 1.000000e+00 , 1.000000e+00 @@ -71,10 +108,3 @@ entry: %i2 = insertelement <4 x double> %i1, double %t3, i32 0 ret <4 x double> %i2 } - -; CHECK-LABEL: @constant_folding -; CHECK: %[[V0:.+]] = extractelement <2 x double> , i32 0 -; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1 -; CHECK: %[[V2:.+]] = extractelement <2 x double> , i32 1 -; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0 -; CHECK: ret <4 x double> %[[V3]] diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll new file mode 100644 index 000000000000..e9c54151cf29 --- /dev/null +++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll @@ -0,0 +1,77 @@ +; XFAIL: * +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s + +; FIXME: Merge into backedge-id-bug +; Variant which has an issue with region construction + +define amdgpu_kernel void @loop_backedge_misidentified_alt(i32 addrspace(1)* %arg0) #0 { +entry: + %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 + %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid + %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4 + br label %LOOP.HEADER + +LOOP.HEADER: + %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ] + call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0 + %tmp12 = zext i32 %i to i64 + %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12 + %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16 + %tmp15 = extractelement <4 x i32> %tmp14, i64 0 + %tmp16 = and i32 %tmp15, 65535 + %tmp17 = icmp eq i32 %tmp16, 1 + br i1 %tmp17, label %bb18, label %bb62 + +bb18: + %tmp19 = extractelement <2 x i32> %tmp, i64 0 + %tmp22 = lshr i32 %tmp19, 16 + %tmp24 = urem i32 %tmp22, 52 + %tmp25 = mul nuw nsw i32 %tmp24, 52 + br label %INNER_LOOP + +INNER_LOOP: + %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ] + call void asm sideeffect "; inner loop body", ""() #0 + %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1 + %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0 + br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP + +INNER_LOOP_BREAK: + %tmp59 = extractelement <4 x i32> %tmp14, i64 2 + call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 + br label %END_ELSE_BLOCK + +bb62: + %load13 = icmp ult i32 %tmp16, 271 + ;br i1 %load13, label %bb64, label %INCREMENT_I + ; branching directly to the return avoids the bug + br i1 %load13, label %RETURN, label %INCREMENT_I + + +bb64: + call void asm sideeffect "s_nop 42", "~{memory}"() #0 + br label %RETURN + +INCREMENT_I: + %inc.i = add i32 %i, 1 + call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0 + br label %END_ELSE_BLOCK + +END_ELSE_BLOCK: + %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ] + call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0 + %cmp.end.else.block = icmp eq i32 %i.final, -1 + br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER + +RETURN: + call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0 + store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { convergent nounwind } +attributes #1 = { convergent nounwind readnone } diff --git a/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll new file mode 100644 index 000000000000..9cddffdd1795 --- /dev/null +++ b/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s + +; StructurizeCFG::orderNodes used an arbitrary and nonsensical sorting +; function which broke the basic backedge identification algorithm. It +; would use RPO order, but then do a weird partial sort by the loop +; depth assuming blocks are sorted by loop. However a block can appear +; in between blocks of a loop that is not part of a loop, breaking the +; assumption of the sort. +; +; The collectInfos must be done in RPO order. The actual +; structurization order I think is less important, but unless the loop +; headers are identified in RPO order, it finds the wrong set of back +; edges. + +define amdgpu_kernel void @loop_backedge_misidentified(i32 addrspace(1)* %arg0) #0 { +; CHECK-LABEL: @loop_backedge_misidentified( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef +; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]] +; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: LOOP.HEADER: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[FLOW4:%.*]] ] +; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]] +; CHECK: Flow2: +; CHECK-NEXT: br label [[FLOW]] +; CHECK: bb18: +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP19]], 16 +; CHECK-NEXT: [[TMP24:%.*]] = urem i32 [[TMP22]], 52 +; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 +; CHECK-NEXT: br label [[INNER_LOOP:%.*]] +; CHECK: Flow3: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP7:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP2]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW4]] +; CHECK: INNER_LOOP: +; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: [[INNER_LOOP_J_INC]] = add nsw i32 [[INNER_LOOP_J]], 1 +; CHECK-NEXT: [[INNER_LOOP_CMP:%.*]] = icmp eq i32 [[INNER_LOOP_J]], 0 +; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]] +; CHECK: INNER_LOOP_BREAK: +; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2 +; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 +; CHECK-NEXT: br label [[FLOW3:%.*]] +; CHECK: bb62: +; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271 +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[LOAD13]], true +; CHECK-NEXT: br i1 [[TMP3]], label [[INCREMENT_I:%.*]], label [[FLOW1:%.*]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I]] ], [ undef, [[BB62]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] +; CHECK-NEXT: br i1 [[TMP6]], label [[BB64:%.*]], label [[FLOW2:%.*]] +; CHECK: bb64: +; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0 +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP7]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW2]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW3]] +; CHECK: INCREMENT_I: +; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 +; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: END_ELSE_BLOCK: +; CHECK-NEXT: [[I_FINAL:%.*]] = phi i32 [ [[TMP1]], [[FLOW3]] ] +; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337 +; CHECK-NEXT: [[CMP_END_ELSE_BLOCK:%.*]] = icmp eq i32 [[I_FINAL]], -1 +; CHECK-NEXT: br label [[FLOW4]] +; CHECK: Flow4: +; CHECK-NEXT: [[TMP10]] = phi i32 [ [[I_FINAL]], [[END_ELSE_BLOCK]] ], [ undef, [[FLOW3]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW3]] ] +; CHECK-NEXT: br i1 [[TMP11]], label [[RETURN:%.*]], label [[LOOP_HEADER]] +; CHECK: RETURN: +; CHECK-NEXT: call void asm sideeffect "s_nop 0x99 +; CHECK-NEXT: store volatile <2 x float> [[LOAD1]], <2 x float> addrspace(1)* undef, align 8 +; CHECK-NEXT: ret void +; +entry: + %tmp = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 + %load1 = load volatile <2 x float>, <2 x float> addrspace(1)* undef + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i32 %tid + %i.initial = load volatile i32, i32 addrspace(1)* %gep, align 4 + br label %LOOP.HEADER + +LOOP.HEADER: + %i = phi i32 [ %i.final, %END_ELSE_BLOCK ], [ %i.initial, %entry ] + call void asm sideeffect "s_nop 0x100b ; loop $0 ", "r,~{memory}"(i32 %i) #0 + %tmp12 = zext i32 %i to i64 + %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 %tmp12 + %tmp14 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp13, align 16 + %tmp15 = extractelement <4 x i32> %tmp14, i64 0 + %tmp16 = and i32 %tmp15, 65535 + %tmp17 = icmp eq i32 %tmp16, 1 + br i1 %tmp17, label %bb18, label %bb62 + +bb18: + %tmp19 = extractelement <2 x i32> %tmp, i64 0 + %tmp22 = lshr i32 %tmp19, 16 + %tmp24 = urem i32 %tmp22, 52 + %tmp25 = mul nuw nsw i32 %tmp24, 52 + br label %INNER_LOOP + +INNER_LOOP: + %inner.loop.j = phi i32 [ %tmp25, %bb18 ], [ %inner.loop.j.inc, %INNER_LOOP ] + call void asm sideeffect "; inner loop body", ""() #0 + %inner.loop.j.inc = add nsw i32 %inner.loop.j, 1 + %inner.loop.cmp = icmp eq i32 %inner.loop.j, 0 + br i1 %inner.loop.cmp, label %INNER_LOOP_BREAK, label %INNER_LOOP + +INNER_LOOP_BREAK: + %tmp59 = extractelement <4 x i32> %tmp14, i64 2 + call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 + br label %END_ELSE_BLOCK + +bb62: + %load13 = icmp ult i32 %tmp16, 271 + br i1 %load13, label %bb64, label %INCREMENT_I + +bb64: + call void asm sideeffect "s_nop 42", "~{memory}"() #0 + br label %RETURN + +INCREMENT_I: + %inc.i = add i32 %i, 1 + call void asm sideeffect "s_nop 0x1336 ; increment $0", "v,~{memory}"(i32 %inc.i) #0 + br label %END_ELSE_BLOCK + +END_ELSE_BLOCK: + %i.final = phi i32 [ %tmp59, %INNER_LOOP_BREAK ], [ %inc.i, %INCREMENT_I ] + call void asm sideeffect "s_nop 0x1337 ; end else block $0", "v,~{memory}"(i32 %i.final) #0 + %cmp.end.else.block = icmp eq i32 %i.final, -1 + br i1 %cmp.end.else.block, label %RETURN, label %LOOP.HEADER + +RETURN: + call void asm sideeffect "s_nop 0x99 ; ClosureEval return", "~{memory}"() #0 + store volatile <2 x float> %load1, <2 x float> addrspace(1)* undef, align 8 + ret void +} + +; The same function, except break to return block goes directly to the +; return, which managed to hide the bug. +; FIXME: Merge variant from backedge-id-bug-xfail + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { convergent nounwind } +attributes #1 = { convergent nounwind readnone } diff --git a/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg new file mode 100644 index 000000000000..2a665f06be72 --- /dev/null +++ b/test/Transforms/StructurizeCFG/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll index 58634d0d37db..7b5bd5acb629 100644 --- a/test/Transforms/StructurizeCFG/nested-loop-order.ll +++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll @@ -1,32 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -structurizecfg %s -o - | FileCheck %s define void @main(float addrspace(1)* %out) { - -; CHECK: main_body: -; CHECK: br label %LOOP.outer +; CHECK-LABEL: @main( +; CHECK-NEXT: main_body: +; CHECK-NEXT: br label [[LOOP_OUTER:%.*]] +; CHECK: LOOP.outer: +; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: LOOP: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ] +; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true +; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]] +; CHECK: Flow2: +; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: Flow3: +; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]] +; CHECK: ENDLOOP: +; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3 +; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00 +; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: ret void +; CHECK: ENDIF: +; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true +; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ] +; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ] +; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]] +; CHECK: IF29: +; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2 +; CHECK-NEXT: br label [[FLOW2]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ] +; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ] +; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ] +; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ] +; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]] +; CHECK: ENDIF28: +; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00 +; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2 +; CHECK-NEXT: br label [[FLOW1]] +; main_body: br label %LOOP.outer -; CHECK: LOOP.outer: -; CHECK: br label %LOOP LOOP.outer: ; preds = %ENDIF28, %main_body %temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ] %temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ] br label %LOOP -; CHECK: LOOP: -; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow LOOP: ; preds = %IF29, %LOOP.outer %temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ] %tmp20 = add i32 %temp4.0, 1 %tmp22 = icmp sgt i32 %tmp20, 3 br i1 %tmp22, label %ENDLOOP, label %ENDIF -; CHECK: Flow3 -; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer - -; CHECK: ENDLOOP: -; CHECK: ret void ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP %temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ] %tmp23 = icmp eq i32 %tmp20, 3 @@ -34,29 +78,14 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO store float %.45, float addrspace(1)* %out ret void -; CHECK: ENDIF: -; CHECK: br i1 %tmp31, label %IF29, label %Flow1 ENDIF: ; preds = %LOOP %tmp31 = icmp sgt i32 %tmp20, 1 br i1 %tmp31, label %IF29, label %ENDIF28 -; CHECK: Flow: -; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP - -; CHECK: IF29: -; CHECK: br label %Flow1 IF29: ; preds = %ENDIF %tmp32 = icmp sgt i32 %tmp20, 2 br i1 %tmp32, label %ENDLOOP, label %LOOP -; CHECK: Flow1: -; CHECK: br label %Flow - -; CHECK: Flow2: -; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3 - -; CHECK: ENDIF28: -; CHECK: br label %Flow3 ENDIF28: ; preds = %ENDIF %tmp35 = fadd float %temp8.0.ph, 1.0 %tmp36 = icmp sgt i32 %tmp20, 2 diff --git a/test/tools/llvm-readobj/macho-needed-libs.test b/test/tools/llvm-readobj/macho-needed-libs.test new file mode 100644 index 000000000000..22e6948e758f --- /dev/null +++ b/test/tools/llvm-readobj/macho-needed-libs.test @@ -0,0 +1,26 @@ +# RUN: yaml2obj %s -o %t.o +# RUN: llvm-readobj -needed-libs %t.o | FileCheck %s + +# CHECK: NeededLibraries [ +# CHECK-NEXT: /usr/lib/libSystem.B.dylib +# CHECK-NEXT: ] + +!mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 1 + sizeofcmds: 56 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 81985536 + compatibility_version: 65536 + PayloadString: /usr/lib/libSystem.B.dylib diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp index 39e909279937..64178d7b33ad 100644 --- a/tools/llvm-readobj/MachODumper.cpp +++ b/tools/llvm-readobj/MachODumper.cpp @@ -39,6 +39,8 @@ class MachODumper : public ObjDumper { void printUnwindInfo() override; void printStackMap() const override; + void printNeededLibraries() override; + // MachO-specific. void printMachODataInCode() override; void printMachOVersionMin() override; @@ -675,6 +677,34 @@ void MachODumper::printStackMap() const { StackMapV2Parser(StackMapContentsArray)); } +void MachODumper::printNeededLibraries() { + ListScope D(W, "NeededLibraries"); + + using LibsTy = std::vector; + LibsTy Libs; + + for (const auto &Command : Obj->load_commands()) { + if (Command.C.cmd == MachO::LC_LOAD_DYLIB || + Command.C.cmd == MachO::LC_ID_DYLIB || + Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || + Command.C.cmd == MachO::LC_REEXPORT_DYLIB || + Command.C.cmd == MachO::LC_LAZY_LOAD_DYLIB || + Command.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command); + if (Dl.dylib.name < Dl.cmdsize) { + auto *P = static_cast(Command.Ptr) + Dl.dylib.name; + Libs.push_back(P); + } + } + } + + std::stable_sort(Libs.begin(), Libs.end()); + + for (const auto &L : Libs) { + outs() << " " << L << "\n"; + } +} + void MachODumper::printMachODataInCode() { for (const auto &Load : Obj->load_commands()) { if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { diff --git a/unittests/IR/DominatorTreeBatchUpdatesTest.cpp b/unittests/IR/DominatorTreeBatchUpdatesTest.cpp index 4ad1f69030c1..e362afd84048 100644 --- a/unittests/IR/DominatorTreeBatchUpdatesTest.cpp +++ b/unittests/IR/DominatorTreeBatchUpdatesTest.cpp @@ -258,3 +258,98 @@ TEST(DominatorTreeBatchUpdates, InsertDeleteExhaustive) { EXPECT_TRUE(PDT.verify()); } } + +// These are some odd flowgraphs, usually generated from csmith cases, +// which are difficult on post dom trees. +TEST(DominatorTreeBatchUpdates, InfiniteLoop) { + std::vector Arcs = { + {"1", "2"}, + {"2", "3"}, + {"3", "6"}, {"3", "5"}, + {"4", "5"}, + {"5", "2"}, + {"6", "3"}, {"6", "4"}}; + + // SplitBlock on 3 -> 5 + std::vector Updates = { + {CFGInsert, {"N", "5"}}, {CFGInsert, {"3", "N"}}, {CFGDelete, {"3", "5"}}}; + + CFGHolder Holder; + CFGBuilder B(Holder.F, Arcs, Updates); + DominatorTree DT(*Holder.F); + EXPECT_TRUE(DT.verify()); + PostDomTree PDT(*Holder.F); + EXPECT_TRUE(PDT.verify()); + + while (B.applyUpdate()) + ; + + auto DomUpdates = ToDomUpdates(B, Updates); + DT.applyUpdates(DomUpdates); + EXPECT_TRUE(DT.verify()); + PDT.applyUpdates(DomUpdates); + EXPECT_TRUE(PDT.verify()); +} + +TEST(DominatorTreeBatchUpdates, DeadBlocks) { + std::vector Arcs = { + {"1", "2"}, + {"2", "3"}, + {"3", "4"}, {"3", "7"}, + {"4", "4"}, + {"5", "6"}, {"5", "7"}, + {"6", "7"}, + {"7", "2"}, {"7", "8"}}; + + // Remove dead 5 and 7, + // plus SplitBlock on 7 -> 8 + std::vector Updates = { + {CFGDelete, {"6", "7"}}, {CFGDelete, {"5", "7"}}, {CFGDelete, {"5", "6"}}, + {CFGInsert, {"N", "8"}}, {CFGInsert, {"7", "N"}}, {CFGDelete, {"7", "8"}}}; + + CFGHolder Holder; + CFGBuilder B(Holder.F, Arcs, Updates); + DominatorTree DT(*Holder.F); + EXPECT_TRUE(DT.verify()); + PostDomTree PDT(*Holder.F); + EXPECT_TRUE(PDT.verify()); + + while (B.applyUpdate()) + ; + + auto DomUpdates = ToDomUpdates(B, Updates); + DT.applyUpdates(DomUpdates); + EXPECT_TRUE(DT.verify()); + PDT.applyUpdates(DomUpdates); + EXPECT_TRUE(PDT.verify()); +} + +TEST(DominatorTreeBatchUpdates, InfiniteLoop2) { + std::vector Arcs = { + {"1", "2"}, + {"2", "6"}, {"2", "3"}, + {"3", "4"}, + {"4", "5"}, {"4", "6"}, + {"5", "4"}, + {"6", "2"}}; + + // SplitBlock on 4 -> 6 + std::vector Updates = { + {CFGInsert, {"N", "6"}}, {CFGInsert, {"4", "N"}}, {CFGDelete, {"4", "6"}}}; + + CFGHolder Holder; + CFGBuilder B(Holder.F, Arcs, Updates); + DominatorTree DT(*Holder.F); + EXPECT_TRUE(DT.verify()); + PostDomTree PDT(*Holder.F); + EXPECT_TRUE(PDT.verify()); + + while (B.applyUpdate()) + ; + + auto DomUpdates = ToDomUpdates(B, Updates); + DT.applyUpdates(DomUpdates); + EXPECT_TRUE(DT.verify()); + PDT.applyUpdates(DomUpdates); + EXPECT_TRUE(PDT.verify()); +} diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp index bf5aced49289..4666f93da2d9 100644 --- a/unittests/IR/DominatorTreeTest.cpp +++ b/unittests/IR/DominatorTreeTest.cpp @@ -925,3 +925,28 @@ TEST(DominatorTree, InsertDeleteExhaustive) { } } } + +TEST(DominatorTree, InsertIntoIrreducible) { + std::vector Arcs = { + {"0", "1"}, + {"1", "27"}, {"1", "7"}, + {"10", "18"}, + {"13", "10"}, + {"18", "13"}, {"18", "23"}, + {"23", "13"}, {"23", "24"}, + {"24", "1"}, {"24", "18"}, + {"27", "24"}}; + + CFGHolder Holder; + CFGBuilder B(Holder.F, Arcs, {{Insert, {"7", "23"}}}); + DominatorTree DT(*Holder.F); + EXPECT_TRUE(DT.verify()); + + B.applyUpdate(); + BasicBlock *From = B.getOrAddBlock("7"); + BasicBlock *To = B.getOrAddBlock("23"); + DT.insertEdge(From, To); + + EXPECT_TRUE(DT.verify()); +} + diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index 66a2c578083e..440dee53c1b7 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -33,6 +33,7 @@ do_asserts="no" do_compare="yes" do_rt="yes" do_libs="yes" +do_libcxxabi="yes" do_libunwind="yes" do_test_suite="yes" do_openmp="yes" @@ -62,6 +63,7 @@ function usage() { echo " For example -svn-path trunk or -svn-path branches/release_37" echo " -no-rt Disable check-out & build Compiler-RT" echo " -no-libs Disable check-out & build libcxx/libcxxabi/libunwind" + echo " -no-libcxxabi Disable check-out & build libcxxabi" echo " -no-libunwind Disable check-out & build libunwind" echo " -no-test-suite Disable check-out & build test-suite" echo " -no-openmp Disable check-out & build libomp" @@ -135,6 +137,9 @@ while [ $# -gt 0 ]; do -no-libs ) do_libs="no" ;; + -no-libcxxabi ) + do_libcxxabi="no" + ;; -no-libunwind ) do_libunwind="no" ;; @@ -206,7 +211,10 @@ if [ $do_rt = "yes" ]; then projects="$projects compiler-rt" fi if [ $do_libs = "yes" ]; then - projects="$projects libcxx libcxxabi" + projects="$projects libcxx" + if [ $do_libcxxabi = "yes" ]; then + projects="$projects libcxxabi" + fi if [ $do_libunwind = "yes" ]; then projects="$projects libunwind" fi From 520a89e9d38bb1c9cc2de3f875eae3ac69f9f08a Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:25:37 +0000 Subject: [PATCH 2/6] Vendor import of clang release_60 branch r323338: https://llvm.org/svn/llvm-project/cfe/branches/release_60@323338 --- docs/OpenMPSupport.rst | 68 ++++++++++ docs/ReleaseNotes.rst | 18 ++- docs/index.rst | 1 + include/clang/Basic/Attr.td | 1 + include/clang/Basic/BuiltinsX86.def | 18 +-- include/clang/Basic/DiagnosticGroups.td | 4 +- include/clang/Basic/TokenKinds.def | 1 - .../StaticAnalyzer/Core/BugReporter/BugType.h | 38 ++++-- lib/AST/DeclBase.cpp | 2 + lib/AST/ODRHash.cpp | 2 + lib/CodeGen/CGBuiltin.cpp | 6 +- .../ObjectFilePCHContainerOperations.cpp | 5 + lib/Frontend/InitPreprocessor.cpp | 4 - lib/Lex/Lexer.cpp | 19 +-- lib/Lex/PPCaching.cpp | 4 +- lib/Lex/PPLexerChange.cpp | 1 + lib/Sema/Scope.cpp | 99 ++++++--------- lib/Sema/SemaTemplateDeduction.cpp | 4 + lib/Sema/SemaTemplateInstantiateDecl.cpp | 3 +- lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 7 +- lib/StaticAnalyzer/Checkers/ValistChecker.cpp | 18 +-- test/Analysis/malloc.c | 7 -- test/CodeCompletion/Inputs/comments.h | 4 + test/CodeCompletion/comments.cpp | 13 ++ test/CodeGen/builtins-overflow.c | 8 +- test/CodeGenCXX/cxx1z-inline-variables.cpp | 8 ++ test/Lexer/null-character-in-literal.c | Bin 0 -> 917 bytes test/Modules/ExtDebugInfo.cpp | 2 +- test/Modules/Inputs/DebugCXX.h | 3 + test/Modules/Inputs/odr_hash-Friend/Box.h | 14 +++ test/Modules/Inputs/odr_hash-Friend/M1.h | 6 + test/Modules/Inputs/odr_hash-Friend/M2.h | 5 + test/Modules/Inputs/odr_hash-Friend/M3.h | 7 ++ .../Inputs/odr_hash-Friend/module.modulemap | 15 +++ test/Modules/ModuleDebugInfo.cpp | 17 ++- test/Modules/odr_hash-Friend.cpp | 22 ++++ test/Modules/odr_hash-blocks.cpp | 119 ++++++++++++++++++ test/Preprocessor/cuda-types.cu | 20 +-- test/Sema/_Float128.c | 22 ---- test/Sema/tautological-constant-compare.c | 4 +- ...xx1z-class-template-argument-deduction.cpp | 11 ++ test/SemaTemplate/alignas.cpp | 11 ++ test/SemaTemplate/cxx17-inline-variables.cpp | 11 ++ unittests/Lex/LexerTest.cpp | 2 + 44 files changed, 494 insertions(+), 160 deletions(-) create mode 100644 docs/OpenMPSupport.rst create mode 100644 test/CodeCompletion/Inputs/comments.h create mode 100644 test/CodeCompletion/comments.cpp create mode 100644 test/Lexer/null-character-in-literal.c create mode 100644 test/Modules/Inputs/odr_hash-Friend/Box.h create mode 100644 test/Modules/Inputs/odr_hash-Friend/M1.h create mode 100644 test/Modules/Inputs/odr_hash-Friend/M2.h create mode 100644 test/Modules/Inputs/odr_hash-Friend/M3.h create mode 100644 test/Modules/Inputs/odr_hash-Friend/module.modulemap create mode 100644 test/Modules/odr_hash-Friend.cpp create mode 100644 test/Modules/odr_hash-blocks.cpp delete mode 100644 test/Sema/_Float128.c diff --git a/docs/OpenMPSupport.rst b/docs/OpenMPSupport.rst new file mode 100644 index 000000000000..c121df3e2010 --- /dev/null +++ b/docs/OpenMPSupport.rst @@ -0,0 +1,68 @@ +.. raw:: html + + + +.. role:: none +.. role:: partial +.. role:: good + +================== +OpenMP Support +================== + +Clang fully supports OpenMP 3.1 + some elements of OpenMP 4.5. Clang supports offloading to X86_64, AArch64 and PPC64[LE] devices. +Support for Cuda devices is not ready yet. +The status of major OpenMP 4.5 features support in Clang. + +Standalone directives +===================== + +* #pragma omp [for] simd: :good:`Complete`. + +* #pragma omp declare simd: :partial:`Partial`. We support parsing/semantic + analysis + generation of special attributes for X86 target, but still + missing the LLVM pass for vectorization. + +* #pragma omp taskloop [simd]: :good:`Complete`. + +* #pragma omp target [enter|exit] data: :good:`Complete`. + +* #pragma omp target update: :good:`Complete`. + +* #pragma omp target: :partial:`Partial`. No support for the `depend` clauses. + +* #pragma omp declare target: :partial:`Partial`. No full codegen support. + +* #pragma omp teams: :good:`Complete`. + +* #pragma omp distribute [simd]: :good:`Complete`. + +* #pragma omp distribute parallel for [simd]: :good:`Complete`. + +Combined directives +=================== + +* #pragma omp parallel for simd: :good:`Complete`. + +* #pragma omp target parallel: :partial:`Partial`. No support for the `depend` clauses. + +* #pragma omp target parallel for [simd]: :partial:`Partial`. No support for the `depend` clauses. + +* #pragma omp target simd: :partial:`Partial`. No support for the `depend` clauses. + +* #pragma omp target teams: :partial:`Partial`. No support for the `depend` clauses. + +* #pragma omp teams distribute [simd]: :good:`Complete`. + +* #pragma omp target teams distribute [simd]: :partial:`Partial`. No support for the and `depend` clauses. + +* #pragma omp teams distribute parallel for [simd]: :good:`Complete`. + +* #pragma omp target teams distribute parallel for [simd]: :partial:`Partial`. No full codegen support. + +Clang does not support any constructs/updates from upcoming OpenMP 5.0 except for `reduction`-based clauses in the `task` and `target`-based directives. +In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and mac OS. diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 92aa01c4b1ef..6857903c96d9 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -163,6 +163,15 @@ Attribute Changes in Clang - The presence of __attribute__((availability(...))) on a declaration no longer implies default visibility for that declaration on macOS. +- Clang now supports configuration files. These are collections of driver + options, which can be applied by specifying the configuration file, either + using command line option `--config foo.cfg` or encoding it into executable + name `foo-clang`. Clang behaves as if the options from this file were inserted + before the options specified in command line. This feature is primary intended + to facilitate cross compilation. Details can be found in + `Clang Compiler User's Manual + `. + - ... Windows Support @@ -209,7 +218,7 @@ OpenCL C Language Changes in Clang OpenMP Support in Clang ---------------------------------- -- Added options `-f[no]-openmp-simd` that support code emission only foe OpenMP +- Added options `-f[no]-openmp-simd` that support code emission only for OpenMP SIMD-based directives, like `#pragma omp simd`, `#pragma omp parallel for simd` etc. The code is emitted only for simd-based part of the combined directives and clauses. @@ -222,6 +231,13 @@ OpenMP Support in Clang - Added support for `reduction`-based clauses on `task`-based directives from upcoming OpenMP 5.0. +- The LLVM OpenMP runtime `libomp` now supports the OpenMP Tools Interface (OMPT) + on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS. If you observe + a measurable performance impact on one of your applications without a tool + attached, please rebuild the runtime library with `-DLIBOMP_OMPT_SUPPORT=OFF` and + file a bug at `LLVM's Bugzilla `_ or send a message to the + `OpenMP development list `_. + Internal API Changes -------------------- diff --git a/docs/index.rst b/docs/index.rst index 342ab74d2d80..ed479534ee97 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,7 @@ Using Clang as a Compiler SourceBasedCodeCoverage Modules MSVCCompatibility + OpenMPSupport ThinLTO CommandGuide/index FAQ diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td index 8b84c4b8b50d..59f595e03c34 100644 --- a/include/clang/Basic/Attr.td +++ b/include/clang/Basic/Attr.td @@ -549,6 +549,7 @@ def Aligned : InheritableAttr { Keyword<"_Alignas">]>, Accessor<"isDeclspec",[Declspec<"align">]>]; let Documentation = [Undocumented]; + let DuplicatesAllowedWhileMerging = 1; } def AlignValue : Attr { diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 465551be7742..9169b7c22920 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1357,15 +1357,15 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2") TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw") diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td index 79b2766ae16f..b54f07508867 100644 --- a/include/clang/Basic/DiagnosticGroups.td +++ b/include/clang/Basic/DiagnosticGroups.td @@ -444,8 +444,7 @@ def TautologicalInRangeCompare : DiagGroup<"tautological-constant-in-range-compa TautologicalUnsignedEnumZeroCompare]>; def TautologicalOutOfRangeCompare : DiagGroup<"tautological-constant-out-of-range-compare">; def TautologicalConstantCompare : DiagGroup<"tautological-constant-compare", - [TautologicalInRangeCompare, - TautologicalOutOfRangeCompare]>; + [TautologicalOutOfRangeCompare]>; def TautologicalPointerCompare : DiagGroup<"tautological-pointer-compare">; def TautologicalOverlapCompare : DiagGroup<"tautological-overlap-compare">; def TautologicalUndefinedCompare : DiagGroup<"tautological-undefined-compare">; @@ -719,7 +718,6 @@ def IntToPointerCast : DiagGroup<"int-to-pointer-cast", def Move : DiagGroup<"move", [PessimizingMove, RedundantMove, SelfMove]>; def Extra : DiagGroup<"extra", [ - TautologicalInRangeCompare, MissingFieldInitializers, IgnoredQualifiers, InitializerOverrides, diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def index 6ae8821a834d..91c13244f9a0 100644 --- a/include/clang/Basic/TokenKinds.def +++ b/include/clang/Basic/TokenKinds.def @@ -398,7 +398,6 @@ TYPE_TRAIT_2(__builtin_types_compatible_p, TypeCompatible, KEYNOCXX) KEYWORD(__builtin_va_arg , KEYALL) KEYWORD(__extension__ , KEYALL) KEYWORD(__float128 , KEYALL) -ALIAS("_Float128", __float128 , KEYNOCXX) KEYWORD(__imag , KEYALL) KEYWORD(__int128 , KEYALL) KEYWORD(__label__ , KEYALL) diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h index 18fa85c9657f..d3163ef3e576 100644 --- a/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h +++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h @@ -32,27 +32,39 @@ class BugType { const CheckName Check; const std::string Name; const std::string Category; - bool SuppressonSink; + const CheckerBase *Checker; + bool SuppressOnSink; virtual void anchor(); -public: - BugType(class CheckName check, StringRef name, StringRef cat) - : Check(check), Name(name), Category(cat), SuppressonSink(false) {} - BugType(const CheckerBase *checker, StringRef name, StringRef cat) - : Check(checker->getCheckName()), Name(name), Category(cat), - SuppressonSink(false) {} - virtual ~BugType() {} - // FIXME: Should these be made strings as well? +public: + BugType(CheckName Check, StringRef Name, StringRef Cat) + : Check(Check), Name(Name), Category(Cat), Checker(nullptr), + SuppressOnSink(false) {} + BugType(const CheckerBase *Checker, StringRef Name, StringRef Cat) + : Check(Checker->getCheckName()), Name(Name), Category(Cat), + Checker(Checker), SuppressOnSink(false) {} + virtual ~BugType() = default; + StringRef getName() const { return Name; } StringRef getCategory() const { return Category; } - StringRef getCheckName() const { return Check.getName(); } + StringRef getCheckName() const { + // FIXME: This is a workaround to ensure that the correct check name is used + // The check names are set after the constructors are run. + // In case the BugType object is initialized in the checker's ctor + // the Check field will be empty. To circumvent this problem we use + // CheckerBase whenever it is possible. + StringRef CheckName = + Checker ? Checker->getCheckName().getName() : Check.getName(); + assert(!CheckName.empty() && "Check name is not set properly."); + return CheckName; + } /// isSuppressOnSink - Returns true if bug reports associated with this bug /// type should be suppressed if the end node of the report is post-dominated /// by a sink node. - bool isSuppressOnSink() const { return SuppressonSink; } - void setSuppressOnSink(bool x) { SuppressonSink = x; } + bool isSuppressOnSink() const { return SuppressOnSink; } + void setSuppressOnSink(bool x) { SuppressOnSink = x; } virtual void FlushReports(BugReporter& BR); }; @@ -74,7 +86,7 @@ class BuiltinBug : public BugType { StringRef getDescription() const { return desc; } }; -} // end GR namespace +} // end ento namespace } // end clang namespace #endif diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp index 29ce7ae034b5..2cdcdae9ab02 100644 --- a/lib/AST/DeclBase.cpp +++ b/lib/AST/DeclBase.cpp @@ -891,12 +891,14 @@ bool Decl::AccessDeclContextSanity() const { // 4. the context is not a record // 5. it's invalid // 6. it's a C++0x static_assert. + // 7. it's a block literal declaration if (isa(this) || isa(this) || isa(this) || !isa(getDeclContext()) || isInvalidDecl() || isa(this) || + isa(this) || // FIXME: a ParmVarDecl can have ClassTemplateSpecialization // as DeclContext (?). isa(this) || diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp index 088d8bedd453..38e8d34135f9 100644 --- a/lib/AST/ODRHash.cpp +++ b/lib/AST/ODRHash.cpp @@ -478,6 +478,8 @@ void ODRHash::AddFunctionDecl(const FunctionDecl *Function) { // TODO: Fix hashing for class methods. if (isa(Function)) return; + // And friend functions. + if (Function->getFriendObjectKind()) return; // Skip functions that are specializations or in specialization context. const DeclContext *DC = Function; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index ba54f8342f1b..35ae114c4f25 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -915,7 +915,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); } - Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy); + // Negate the product if it would be negative in infinite precision. + Result = CGF.Builder.CreateSelect( + IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); + + Result = CGF.Builder.CreateTrunc(Result, ResTy); } assert(Overflow && Result && "Missing overflow or result"); diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index d0760b9cc2a6..0fe9f5da07c8 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -229,6 +229,11 @@ class PCHContainerGenerator : public ASTConsumer { Builder->getModuleDebugInfo()->completeRequiredType(RD); } + void HandleImplicitImportDecl(ImportDecl *D) override { + if (!D->getImportedOwningModule()) + Builder->getModuleDebugInfo()->EmitImportDecl(*D); + } + /// Emit a container holding the serialized AST. void HandleTranslationUnit(ASTContext &Ctx) override { assert(M && VMContext && Builder); diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp index 639050f7c64b..321d963827d1 100644 --- a/lib/Frontend/InitPreprocessor.cpp +++ b/lib/Frontend/InitPreprocessor.cpp @@ -817,10 +817,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, DefineFloatMacros(Builder, "FLT", &TI.getFloatFormat(), "F"); DefineFloatMacros(Builder, "DBL", &TI.getDoubleFormat(), ""); DefineFloatMacros(Builder, "LDBL", &TI.getLongDoubleFormat(), "L"); - if (TI.hasFloat128Type()) - // FIXME: Switch away from the non-standard "Q" when we can - DefineFloatMacros(Builder, "FLT128", &TI.getFloat128Format(), "Q"); - // Define a __POINTER_WIDTH__ macro for stdint.h. Builder.defineMacro("__POINTER_WIDTH__", diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 830354ab23f0..8bd4ab0ff9ca 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2009,18 +2009,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); while (C != '>') { - // Skip escaped characters. - if (C == '\\' && CurPtr < BufferEnd) { - // Skip the escaped character. - getAndAdvanceChar(CurPtr, Result); - } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + // Skip escaped characters. Escaped newlines will already be processed by + // getAndAdvanceChar. + if (C == '\\') + C = getAndAdvanceChar(CurPtr, Result); + + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); return true; - } else if (C == 0) { + } + + if (C == 0) { NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result); diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp index f5e8cdc25d38..9758557d7b44 100644 --- a/lib/Lex/PPCaching.cpp +++ b/lib/Lex/PPCaching.cpp @@ -105,8 +105,10 @@ void Preprocessor::CachingLex(Token &Result) { } void Preprocessor::EnterCachingLexMode() { - if (InCachingLexMode()) + if (InCachingLexMode()) { + assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); return; + } PushIncludeMacroStack(); CurLexerKind = CLK_CachingLexer; diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index e484e9c4c3a3..f21787338b37 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -444,6 +444,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { } CurPPLexer = nullptr; + recomputeCurLexerKind(); return true; } diff --git a/lib/Sema/Scope.cpp b/lib/Sema/Scope.cpp index ae5b181c6728..eae5a328bfa2 100644 --- a/lib/Sema/Scope.cpp +++ b/lib/Sema/Scope.cpp @@ -143,72 +143,43 @@ void Scope::dumpImpl(raw_ostream &OS) const { if (HasFlags) OS << "Flags: "; - while (Flags) { - if (Flags & FnScope) { - OS << "FnScope"; - Flags &= ~FnScope; - } else if (Flags & BreakScope) { - OS << "BreakScope"; - Flags &= ~BreakScope; - } else if (Flags & ContinueScope) { - OS << "ContinueScope"; - Flags &= ~ContinueScope; - } else if (Flags & DeclScope) { - OS << "DeclScope"; - Flags &= ~DeclScope; - } else if (Flags & ControlScope) { - OS << "ControlScope"; - Flags &= ~ControlScope; - } else if (Flags & ClassScope) { - OS << "ClassScope"; - Flags &= ~ClassScope; - } else if (Flags & BlockScope) { - OS << "BlockScope"; - Flags &= ~BlockScope; - } else if (Flags & TemplateParamScope) { - OS << "TemplateParamScope"; - Flags &= ~TemplateParamScope; - } else if (Flags & FunctionPrototypeScope) { - OS << "FunctionPrototypeScope"; - Flags &= ~FunctionPrototypeScope; - } else if (Flags & FunctionDeclarationScope) { - OS << "FunctionDeclarationScope"; - Flags &= ~FunctionDeclarationScope; - } else if (Flags & AtCatchScope) { - OS << "AtCatchScope"; - Flags &= ~AtCatchScope; - } else if (Flags & ObjCMethodScope) { - OS << "ObjCMethodScope"; - Flags &= ~ObjCMethodScope; - } else if (Flags & SwitchScope) { - OS << "SwitchScope"; - Flags &= ~SwitchScope; - } else if (Flags & TryScope) { - OS << "TryScope"; - Flags &= ~TryScope; - } else if (Flags & FnTryCatchScope) { - OS << "FnTryCatchScope"; - Flags &= ~FnTryCatchScope; - } else if (Flags & SEHTryScope) { - OS << "SEHTryScope"; - Flags &= ~SEHTryScope; - } else if (Flags & SEHExceptScope) { - OS << "SEHExceptScope"; - Flags &= ~SEHExceptScope; - } else if (Flags & OpenMPDirectiveScope) { - OS << "OpenMPDirectiveScope"; - Flags &= ~OpenMPDirectiveScope; - } else if (Flags & OpenMPLoopDirectiveScope) { - OS << "OpenMPLoopDirectiveScope"; - Flags &= ~OpenMPLoopDirectiveScope; - } else if (Flags & OpenMPSimdDirectiveScope) { - OS << "OpenMPSimdDirectiveScope"; - Flags &= ~OpenMPSimdDirectiveScope; - } + std::pair FlagInfo[] = { + {FnScope, "FnScope"}, + {BreakScope, "BreakScope"}, + {ContinueScope, "ContinueScope"}, + {DeclScope, "DeclScope"}, + {ControlScope, "ControlScope"}, + {ClassScope, "ClassScope"}, + {BlockScope, "BlockScope"}, + {TemplateParamScope, "TemplateParamScope"}, + {FunctionPrototypeScope, "FunctionPrototypeScope"}, + {FunctionDeclarationScope, "FunctionDeclarationScope"}, + {AtCatchScope, "AtCatchScope"}, + {ObjCMethodScope, "ObjCMethodScope"}, + {SwitchScope, "SwitchScope"}, + {TryScope, "TryScope"}, + {FnTryCatchScope, "FnTryCatchScope"}, + {OpenMPDirectiveScope, "OpenMPDirectiveScope"}, + {OpenMPLoopDirectiveScope, "OpenMPLoopDirectiveScope"}, + {OpenMPSimdDirectiveScope, "OpenMPSimdDirectiveScope"}, + {EnumScope, "EnumScope"}, + {SEHTryScope, "SEHTryScope"}, + {SEHExceptScope, "SEHExceptScope"}, + {SEHFilterScope, "SEHFilterScope"}, + {CompoundStmtScope, "CompoundStmtScope"}, + {ClassInheritanceScope, "ClassInheritanceScope"}}; - if (Flags) - OS << " | "; + for (auto Info : FlagInfo) { + if (Flags & Info.first) { + OS << Info.second; + Flags &= ~Info.first; + if (Flags) + OS << " | "; + } } + + assert(Flags == 0 && "Unknown scope flags"); + if (HasFlags) OS << '\n'; diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp index 3a0f2ff0004b..d09cf9933ecf 100644 --- a/lib/Sema/SemaTemplateDeduction.cpp +++ b/lib/Sema/SemaTemplateDeduction.cpp @@ -502,6 +502,10 @@ DeduceTemplateArguments(Sema &S, SmallVectorImpl &Deduced) { assert(Arg.isCanonical() && "Argument type must be canonical"); + // Treat an injected-class-name as its underlying template-id. + if (auto *Injected = dyn_cast(Arg)) + Arg = Injected->getInjectedSpecializationType(); + // Check whether the template argument is a dependent template-id. if (const TemplateSpecializationType *SpecArg = dyn_cast(Arg)) { diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp index ab68e7e671de..9163fbc6f7e8 100644 --- a/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -4160,7 +4160,8 @@ void Sema::BuildVariableInstantiation( // it right away if the type contains 'auto'. if ((!isa(NewVar) && !InstantiatingVarTemplate && - !(OldVar->isInline() && OldVar->isThisDeclarationADefinition())) || + !(OldVar->isInline() && OldVar->isThisDeclarationADefinition() && + !NewVar->isThisDeclarationADefinition())) || NewVar->getType()->isUndeducedType()) InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs); diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 851114004b96..904c9ffa37df 100644 --- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -2900,8 +2900,13 @@ void ento::registerNewDeleteLeaksChecker(CheckerManager &mgr) { mgr.getCurrentCheckName(); // We currently treat NewDeleteLeaks checker as a subchecker of NewDelete // checker. - if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker]) + if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker]) { checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker] = true; + // FIXME: This does not set the correct name, but without this workaround + // no name will be set at all. + checker->CheckNames[MallocChecker::CK_NewDeleteChecker] = + mgr.getCurrentCheckName(); + } } #define REGISTER_CHECKER(name) \ diff --git a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp index 06c4ef71d80b..1ebac2118a42 100644 --- a/lib/StaticAnalyzer/Checkers/ValistChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/ValistChecker.cpp @@ -64,7 +64,7 @@ class ValistChecker : public Checker, CheckerContext &C) const; void reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1, StringRef Msg2, CheckerContext &C, ExplodedNode *N, - bool ForceReport = false) const; + bool ReportUninit = false) const; void checkVAListStartCall(const CallEvent &Call, CheckerContext &C, bool IsCopy) const; @@ -267,15 +267,19 @@ void ValistChecker::reportUninitializedAccess(const MemRegion *VAList, void ValistChecker::reportLeakedVALists(const RegionVector &LeakedVALists, StringRef Msg1, StringRef Msg2, CheckerContext &C, ExplodedNode *N, - bool ForceReport) const { + bool ReportUninit) const { if (!(ChecksEnabled[CK_Unterminated] || - (ChecksEnabled[CK_Uninitialized] && ForceReport))) + (ChecksEnabled[CK_Uninitialized] && ReportUninit))) return; for (auto Reg : LeakedVALists) { if (!BT_leakedvalist) { - BT_leakedvalist.reset(new BugType(CheckNames[CK_Unterminated], - "Leaked va_list", - categories::MemoryError)); + // FIXME: maybe creating a new check name for this type of bug is a better + // solution. + BT_leakedvalist.reset( + new BugType(CheckNames[CK_Unterminated].getName().empty() + ? CheckNames[CK_Uninitialized] + : CheckNames[CK_Unterminated], + "Leaked va_list", categories::MemoryError)); BT_leakedvalist->setSuppressOnSink(true); } @@ -375,7 +379,7 @@ void ValistChecker::checkVAListEndCall(const CallEvent &Call, std::shared_ptr ValistChecker::ValistBugVisitor::VisitNode( const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC, - BugReport &BR) { + BugReport &) { ProgramStateRef State = N->getState(); ProgramStateRef StatePrev = PrevN->getState(); diff --git a/test/Analysis/malloc.c b/test/Analysis/malloc.c index 4c364ebd9a2f..e08ec1b76cff 100644 --- a/test/Analysis/malloc.c +++ b/test/Analysis/malloc.c @@ -1720,13 +1720,6 @@ void *smallocWarn(size_t size) { } } -char *dupstrWarn(const char *s) { - const int len = strlen(s); - char *p = (char*) smallocWarn(len + 1); - strcpy(p, s); // expected-warning{{String copy function overflows destination buffer}} - return p; -} - int *radar15580979() { int *data = (int *)malloc(32); int *p = data ?: (int*)malloc(32); // no warning diff --git a/test/CodeCompletion/Inputs/comments.h b/test/CodeCompletion/Inputs/comments.h new file mode 100644 index 000000000000..7b4b5daa0bd5 --- /dev/null +++ b/test/CodeCompletion/Inputs/comments.h @@ -0,0 +1,4 @@ +// PR32732 +struct B { + // <- code completion +}; diff --git a/test/CodeCompletion/comments.cpp b/test/CodeCompletion/comments.cpp new file mode 100644 index 000000000000..21f1465ebc04 --- /dev/null +++ b/test/CodeCompletion/comments.cpp @@ -0,0 +1,13 @@ +// Note: the run lines follow their respective tests, since line/column +// matter in this test. + +#include "comments.h" + +struct A { + // <- code completion + /* <- code completion */ +}; + +// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:7:6 %s +// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%s:8:6 %s +// RUN: %clang_cc1 -I %S/Inputs -fsyntax-only -code-completion-at=%S/Inputs/comments.h:3:6 %s diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c index 7a30cfbd46ee..f83bbfb9672d 100644 --- a/test/CodeGen/builtins-overflow.c +++ b/test/CodeGen/builtins-overflow.c @@ -373,7 +373,9 @@ int test_mixed_sign_mull_overflow_unsigned(int x, unsigned y) { // CHECK-NEXT: [[NotNull:%.*]] = icmp ne i32 [[UnsignedResult]], 0 // CHECK-NEXT: [[Underflow:%.*]] = and i1 [[IsNeg]], [[NotNull]] // CHECK-NEXT: [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[Underflow]] -// CHECK-NEXT: store i32 [[UnsignedResult]], i32* %{{.*}}, align 4 +// CHECK-NEXT: [[NegatedResult:%.*]] = sub i32 0, [[UnsignedResult]] +// CHECK-NEXT: [[Result:%.*]] = select i1 [[IsNeg]], i32 [[NegatedResult]], i32 [[UnsignedResult]] +// CHECK-NEXT: store i32 [[Result]], i32* %{{.*}}, align 4 // CHECK: br i1 [[OFlow]] unsigned result; @@ -432,7 +434,9 @@ long long test_mixed_sign_mulll_overflow_trunc_unsigned(long long x, unsigned lo // CHECK-NEXT: [[OVERFLOW_PRE_TRUNC:%.*]] = or i1 {{.*}}, [[UNDERFLOW]] // CHECK-NEXT: [[TRUNC_OVERFLOW:%.*]] = icmp ugt i64 [[UNSIGNED_RESULT]], 4294967295 // CHECK-NEXT: [[OVERFLOW:%.*]] = or i1 [[OVERFLOW_PRE_TRUNC]], [[TRUNC_OVERFLOW]] -// CHECK-NEXT: trunc i64 [[UNSIGNED_RESULT]] to i32 +// CHECK-NEXT: [[NEGATED:%.*]] = sub i64 0, [[UNSIGNED_RESULT]] +// CHECK-NEXT: [[RESULT:%.*]] = select i1 {{.*}}, i64 [[NEGATED]], i64 [[UNSIGNED_RESULT]] +// CHECK-NEXT: trunc i64 [[RESULT]] to i32 // CHECK-NEXT: store unsigned result; if (__builtin_mul_overflow(y, x, &result)) diff --git a/test/CodeGenCXX/cxx1z-inline-variables.cpp b/test/CodeGenCXX/cxx1z-inline-variables.cpp index 2d16acd8a8c2..50eab3b70611 100644 --- a/test/CodeGenCXX/cxx1z-inline-variables.cpp +++ b/test/CodeGenCXX/cxx1z-inline-variables.cpp @@ -58,14 +58,22 @@ template struct X { static int a; static inline int b; static int c; + static const int d; + static int e; }; // CHECK: @_ZN1XIiE1aE = linkonce_odr global i32 10 // CHECK: @_ZN1XIiE1bE = global i32 20 // CHECK-NOT: @_ZN1XIiE1cE +// CHECK: @_ZN1XIiE1dE = linkonce_odr constant i32 40 +// CHECK: @_ZN1XIiE1eE = linkonce_odr global i32 50 template<> inline int X::a = 10; int &use3 = X::a; template<> int X::b = 20; template<> inline int X::c = 30; +template constexpr int X::d = 40; +template inline int X::e = 50; +const int *use_x_int_d = &X::d; +const int *use_x_int_e = &X::e; template struct Y; template<> struct Y { diff --git a/test/Lexer/null-character-in-literal.c b/test/Lexer/null-character-in-literal.c new file mode 100644 index 0000000000000000000000000000000000000000..a479547536762e96405e1866f3a15b7429ca987e GIT binary patch literal 917 zcmcIj%TB{E5bQZ$u_~$p6=~>|m-qsN5Zo#u$Jw-2-F5JzZB+H&S(66!!KnhD?6r4x zc6OZS@cRCI3j?VIO+Ta@Lsq$lyjW+3bOYHEt*ROrxFpGQc$+B)mC^{(@FPVIwDM?$ z1`1k(oCzx=2i!Fj{76`=0^b=A-hjo0St9ruE+UwaQkBRsS~sI4iMb%)0n%Q22Yip~ z8X2q1R>G2^DQq@}MK0YH)D|7uC6=J*yL?AzyKNDRh&cHxNH|!(kbKI<%rnrO%!&Y= z6g7gw&wwP6iL7r@X4X<0O6Jy&J@tNs>z~=W^EOJfn?~3CuJ=yI+Pfv%f11ml8q%ra2)dBLLCj@(DAltW%jW1iyI{uUHW}gc~67K^9**P e@=@1&$9)0NT4P&hq(?;3_B+KNQkg&{Pwop|mop;( literal 0 HcmV?d00001 diff --git a/test/Modules/ExtDebugInfo.cpp b/test/Modules/ExtDebugInfo.cpp index 97386bc4d007..c57f1f034eb2 100644 --- a/test/Modules/ExtDebugInfo.cpp +++ b/test/Modules/ExtDebugInfo.cpp @@ -187,7 +187,7 @@ void foo() { // CHECK: !DIGlobalVariable(name: "anon_enum", {{.*}}, type: ![[ANON_ENUM:[0-9]+]] // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, scope: ![[NS]], -// CHECK-SAME: line: 16 +// CHECK-SAME: line: 19 // CHECK: !DIGlobalVariable(name: "GlobalUnion", // CHECK-SAME: type: ![[GLOBAL_UNION:[0-9]+]] diff --git a/test/Modules/Inputs/DebugCXX.h b/test/Modules/Inputs/DebugCXX.h index 1ccf8d302f13..8f83c0bc69db 100644 --- a/test/Modules/Inputs/DebugCXX.h +++ b/test/Modules/Inputs/DebugCXX.h @@ -1,4 +1,7 @@ /* -*- C++ -*- */ + +#include "dummy.h" + namespace DebugCXX { // Records. struct Struct { diff --git a/test/Modules/Inputs/odr_hash-Friend/Box.h b/test/Modules/Inputs/odr_hash-Friend/Box.h new file mode 100644 index 000000000000..01ab90d601c2 --- /dev/null +++ b/test/Modules/Inputs/odr_hash-Friend/Box.h @@ -0,0 +1,14 @@ +template +struct iterator { + void Compare(const iterator &x) { } + friend void Check(iterator) {} +}; + +template struct Box { + iterator I; + + void test() { + Check(I); + I.Compare(I); + } +}; diff --git a/test/Modules/Inputs/odr_hash-Friend/M1.h b/test/Modules/Inputs/odr_hash-Friend/M1.h new file mode 100644 index 000000000000..202ad06c3488 --- /dev/null +++ b/test/Modules/Inputs/odr_hash-Friend/M1.h @@ -0,0 +1,6 @@ +#include "Box.h" + +void Peek() { + Box<> Gift; + Gift.test(); +} diff --git a/test/Modules/Inputs/odr_hash-Friend/M2.h b/test/Modules/Inputs/odr_hash-Friend/M2.h new file mode 100644 index 000000000000..69f08a957ede --- /dev/null +++ b/test/Modules/Inputs/odr_hash-Friend/M2.h @@ -0,0 +1,5 @@ +#include "Box.h" +void x() { + Box<> Unused; + //Unused.test(); +} diff --git a/test/Modules/Inputs/odr_hash-Friend/M3.h b/test/Modules/Inputs/odr_hash-Friend/M3.h new file mode 100644 index 000000000000..ab457e0c08f2 --- /dev/null +++ b/test/Modules/Inputs/odr_hash-Friend/M3.h @@ -0,0 +1,7 @@ +#include "Box.h" +#include "M2.h" + +void Party() { + Box<> Present; + Present.test(); +} diff --git a/test/Modules/Inputs/odr_hash-Friend/module.modulemap b/test/Modules/Inputs/odr_hash-Friend/module.modulemap new file mode 100644 index 000000000000..28e1832e30e9 --- /dev/null +++ b/test/Modules/Inputs/odr_hash-Friend/module.modulemap @@ -0,0 +1,15 @@ +module Box { + header "Box.h" +} + +module Module1 { + header "M1.h" +} + +module Module2 { + header "M2.h" +} + +module Module3 { + header "M3.h" +} diff --git a/test/Modules/ModuleDebugInfo.cpp b/test/Modules/ModuleDebugInfo.cpp index 008b3e4f2bab..f0d883767045 100644 --- a/test/Modules/ModuleDebugInfo.cpp +++ b/test/Modules/ModuleDebugInfo.cpp @@ -5,12 +5,13 @@ // Modules: // RUN: rm -rf %t -// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll +// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debugger-tuning=lldb -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll // RUN: cat %t-mod.ll | FileCheck %s // RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s +// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-MOD %s // PCH: -// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll +// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -debugger-tuning=lldb -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll // RUN: cat %t-pch.ll | FileCheck %s // RUN: cat %t-pch.ll | FileCheck --check-prefix=CHECK-NEG %s @@ -18,6 +19,9 @@ @import DebugCXX; #endif +// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus, +// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus, + // CHECK: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus, // CHECK-SAME: isOptimized: false, // CHECK-NOT: splitDebugFilename: @@ -27,6 +31,8 @@ // CHECK-SAME: identifier: "_ZTSN8DebugCXX4EnumE") // CHECK: !DINamespace(name: "DebugCXX" +// CHECK-MOD: ![[DEBUGCXX:.*]] = !DIModule(scope: null, name: "DebugCXX + // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, // CHECK-NOT: name: // CHECK-SAME: ) @@ -150,4 +156,11 @@ // CHECK-SAME: name: "WithSpecializedBase", // CHECK-SAME: flags: DIFlagFwdDecl, +// CHECK-MOD: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[DEBUGCXX]], +// CHECK-MOD-SAME: entity: ![[DUMMY:[0-9]+]], +// CHECK-MOD-SAME: line: 3) +// CHECK-MOD: ![[DUMMY]] = !DIModule(scope: null, name: "dummy", +// CHECK-MOD: distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus, +// CHECK-MOD-SAME: splitDebugFilename: "{{.*}}dummy{{.*}}.pcm", + // CHECK-NEG-NOT: !DICompositeType(tag: DW_TAG_structure_type, name: "PureForwardDecl" diff --git a/test/Modules/odr_hash-Friend.cpp b/test/Modules/odr_hash-Friend.cpp new file mode 100644 index 000000000000..39c0c4b762c2 --- /dev/null +++ b/test/Modules/odr_hash-Friend.cpp @@ -0,0 +1,22 @@ +// RUN: rm -rf %t + +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/modules.cache \ +// RUN: -I %S/Inputs/odr_hash-Friend \ +// RUN: -emit-obj -o /dev/null \ +// RUN: -fmodules \ +// RUN: -fimplicit-module-maps \ +// RUN: -fmodules-cache-path=%t/modules.cache \ +// RUN: -std=c++11 -x c++ %s -verify + +// PR35939: MicrosoftMangle.cpp triggers an assertion failure on this test. +// UNSUPPORTED: system-windows + +// expected-no-diagnostics + +#include "Box.h" +#include "M1.h" +#include "M3.h" + +void Run() { + Box<> Present; +} diff --git a/test/Modules/odr_hash-blocks.cpp b/test/Modules/odr_hash-blocks.cpp new file mode 100644 index 000000000000..07dfa4ce2ac8 --- /dev/null +++ b/test/Modules/odr_hash-blocks.cpp @@ -0,0 +1,119 @@ +// Clear and create directories +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: mkdir %t/cache +// RUN: mkdir %t/Inputs + +// Build first header file +// RUN: echo "#define FIRST" >> %t/Inputs/first.h +// RUN: cat %s >> %t/Inputs/first.h + +// Build second header file +// RUN: echo "#define SECOND" >> %t/Inputs/second.h +// RUN: cat %s >> %t/Inputs/second.h + +// Test that each header can compile +// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/first.h +// RUN: %clang_cc1 -fsyntax-only -x c++ -std=c++11 -fblocks %t/Inputs/second.h + +// Build module map file +// RUN: echo "module FirstModule {" >> %t/Inputs/module.map +// RUN: echo " header \"first.h\"" >> %t/Inputs/module.map +// RUN: echo "}" >> %t/Inputs/module.map +// RUN: echo "module SecondModule {" >> %t/Inputs/module.map +// RUN: echo " header \"second.h\"" >> %t/Inputs/module.map +// RUN: echo "}" >> %t/Inputs/module.map + +// Run test +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps \ +// RUN: -fmodules-cache-path=%t/cache -x c++ -I%t/Inputs \ +// RUN: -verify %s -std=c++11 -fblocks + +#if !defined(FIRST) && !defined(SECOND) +#include "first.h" +#include "second.h" +#endif + +// Used for testing +#if defined(FIRST) +#define ACCESS public: +#elif defined(SECOND) +#define ACCESS private: +#endif + +// TODO: S1, S2, and S3 should generate errors. +namespace Blocks { +#if defined(FIRST) +struct S1 { + void (^block)(int x) = ^(int x) { }; +}; +#elif defined(SECOND) +struct S1 { + void (^block)(int x) = ^(int y) { }; +}; +#else +S1 s1; +#endif + +#if defined(FIRST) +struct S2 { + int (^block)(int x) = ^(int x) { return x + 1; }; +}; +#elif defined(SECOND) +struct S2 { + int (^block)(int x) = ^(int x) { return x; }; +}; +#else +S2 s2; +#endif + +#if defined(FIRST) +struct S3 { + void run(int (^block)(int x)); +}; +#elif defined(SECOND) +struct S3 { + void run(int (^block)(int x, int y)); +}; +#else +S3 s3; +#endif + +#define DECLS \ + int (^block)(int x) = ^(int x) { return x + x; }; \ + void run(int (^block)(int x, int y)); + +#if defined(FIRST) || defined(SECOND) +struct Valid1 { + DECLS +}; +#else +Valid1 v1; +#endif + +#if defined(FIRST) || defined(SECOND) +struct Invalid1 { + DECLS + ACCESS +}; +#else +Invalid1 i1; +// expected-error@second.h:* {{'Blocks::Invalid1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}} +// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}} +#endif + +#undef DECLS +} + +// Keep macros contained to one file. +#ifdef FIRST +#undef FIRST +#endif + +#ifdef SECOND +#undef SECOND +#endif + +#ifdef ACCESS +#undef ACCESS +#endif diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu index 9e96f6a15e6e..4ad3e4d97aa2 100644 --- a/test/Preprocessor/cuda-types.cu +++ b/test/Preprocessor/cuda-types.cu @@ -9,40 +9,40 @@ // RUN: %clang --cuda-host-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-host-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-host-defines-filtered // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-device-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-device-defines-filtered // RUN: diff %t/i386-host-defines-filtered %t/i386-device-defines-filtered // RUN: %clang --cuda-host-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-host-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-host-defines-filtered // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-device-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-device-defines-filtered // RUN: diff %t/x86_64-host-defines-filtered %t/x86_64-device-defines-filtered // RUN: %clang --cuda-host-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/powerpc64-host-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/powerpc64-host-defines-filtered // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/powerpc64-device-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/powerpc64-device-defines-filtered // RUN: diff %t/powerpc64-host-defines-filtered %t/powerpc64-device-defines-filtered // RUN: %clang --cuda-host-only -nocudainc -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-host-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-host-defines-filtered // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target i386-windows-msvc -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-device-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/i386-msvc-device-defines-filtered // RUN: diff %t/i386-msvc-host-defines-filtered %t/i386-msvc-device-defines-filtered // RUN: %clang --cuda-host-only -nocudainc -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-host-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-host-defines-filtered // RUN: %clang --cuda-device-only -nocudainc -nocudalib -target x86_64-windows-msvc -x cuda -E -dM -o - /dev/null \ // RUN: | grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)\|define __GCC_ATOMIC' \ -// RUN: | grep -v '__FLT128\|__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-device-defines-filtered +// RUN: | grep -v '__LDBL\|_LONG_DOUBLE' > %t/x86_64-msvc-device-defines-filtered // RUN: diff %t/x86_64-msvc-host-defines-filtered %t/x86_64-msvc-device-defines-filtered diff --git a/test/Sema/_Float128.c b/test/Sema/_Float128.c deleted file mode 100644 index f0c3c6d555ef..000000000000 --- a/test/Sema/_Float128.c +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -verify %s -// RUN: %clang_cc1 -triple powerpc64-linux -verify %s -// RUN: %clang_cc1 -triple i686-windows-gnu -verify %s -// RUN: %clang_cc1 -triple x86_64-windows-gnu -verify %s -// RUN: %clang_cc1 -triple x86_64-windows-msvc -verify %s - -#if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) -_Float128 f; -_Float128 tiny = __FLT128_EPSILON__; -int g(int x, _Float128 *y) { - return x + *y; -} - -// expected-no-diagnostics -#else -_Float128 f; // expected-error {{__float128 is not supported on this target}} -float tiny = __FLT128_EPSILON__; // expected-error{{use of undeclared identifier}} -int g(int x, _Float128 *y) { // expected-error {{__float128 is not supported on this target}} - return x + *y; -} - -#endif // defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) diff --git a/test/Sema/tautological-constant-compare.c b/test/Sema/tautological-constant-compare.c index 65aa7c9abdea..b242f35dc6cf 100644 --- a/test/Sema/tautological-constant-compare.c +++ b/test/Sema/tautological-constant-compare.c @@ -2,8 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-constant-in-range-compare -DTEST -verify -x c++ %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wtautological-type-limit-compare -DTEST -verify -x c++ %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -DTEST -verify %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -DTEST -verify -x c++ %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wextra -Wno-sign-compare -verify -x c++ %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -Wall -verify -x c++ %s // RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s diff --git a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp index 9080f67fe0e1..d21fbf289289 100644 --- a/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp +++ b/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp @@ -309,6 +309,17 @@ namespace dependent { template int New(int); } +namespace injected_class_name { + template struct A { + A(); + template A(A); + }; + A a; + A b = a; + using T = decltype(a); + using T = decltype(b); +} + #else // expected-no-diagnostics diff --git a/test/SemaTemplate/alignas.cpp b/test/SemaTemplate/alignas.cpp index 8a1f96e5bdec..680f07b32998 100644 --- a/test/SemaTemplate/alignas.cpp +++ b/test/SemaTemplate/alignas.cpp @@ -21,3 +21,14 @@ struct C { char a[16]; }; static_assert(sizeof(my_union) == 16, ""); static_assert(alignof(my_union) == 8, ""); + +namespace PR35028 { + template struct alignas(X) alignas(long long) alignas(long double) alignas(Alignment) Aligned { + union { + long long align1; + long double align2; + char data[sizeof(X)]; + }; + }; + Aligned a; +} diff --git a/test/SemaTemplate/cxx17-inline-variables.cpp b/test/SemaTemplate/cxx17-inline-variables.cpp index 9e6761ee57aa..7fc0aa8eeeb0 100644 --- a/test/SemaTemplate/cxx17-inline-variables.cpp +++ b/test/SemaTemplate/cxx17-inline-variables.cpp @@ -16,3 +16,14 @@ namespace CompleteType { constexpr int n = X::value; } + +template struct A { + static const int n; + static const int m; + constexpr int f() { return n; } + constexpr int g() { return n; } +}; +template constexpr int A::n = sizeof(A) + sizeof(T); +template inline constexpr int A::m = sizeof(A) + sizeof(T); +static_assert(A().f() == 5); +static_assert(A().g() == 5); diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp index d699a44b13fd..317e2c836335 100644 --- a/unittests/Lex/LexerTest.cpp +++ b/unittests/Lex/LexerTest.cpp @@ -476,6 +476,8 @@ TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { TEST_F(LexerTest, AvoidPastEndOfStringDereference) { std::vector LexedTokens = Lex(" // \\\n"); EXPECT_TRUE(LexedTokens.empty()); + EXPECT_TRUE(Lex("#include <\\\\").empty()); + EXPECT_TRUE(Lex("#include <\\\\\n").empty()); } TEST_F(LexerTest, StringizingRasString) { From b99ba46cc70e9300b052ce95aa36b4d05a482f7f Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:25:48 +0000 Subject: [PATCH 3/6] Vendor import of compiler-rt release_60 branch r323338: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_60@323338 --- lib/builtins/clear_cache.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/builtins/clear_cache.c b/lib/builtins/clear_cache.c index 4a01cb46d4ac..451f1c0b1245 100644 --- a/lib/builtins/clear_cache.c +++ b/lib/builtins/clear_cache.c @@ -33,6 +33,11 @@ uintptr_t GetCurrentProcess(void); #include #endif +#if defined(__OpenBSD__) && defined(__mips__) + #include + #include +#endif + #if defined(__linux__) && defined(__mips__) #include #include @@ -142,6 +147,8 @@ void __clear_cache(void *start, void *end) { #else syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); #endif +#elif defined(__mips__) && defined(__OpenBSD__) + cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); #elif defined(__aarch64__) && !defined(__APPLE__) uint64_t xstart = (uint64_t)(uintptr_t) start; uint64_t xend = (uint64_t)(uintptr_t) end; @@ -156,12 +163,14 @@ void __clear_cache(void *start, void *end) { * uintptr_t in case this runs in an IPL32 environment. */ const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); - for (addr = xstart; addr < xend; addr += dcache_line_size) + for (addr = xstart & ~(dcache_line_size - 1); addr < xend; + addr += dcache_line_size) __asm __volatile("dc cvau, %0" :: "r"(addr)); __asm __volatile("dsb ish"); const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); - for (addr = xstart; addr < xend; addr += icache_line_size) + for (addr = xstart & ~(icache_line_size - 1); addr < xend; + addr += icache_line_size) __asm __volatile("ic ivau, %0" :: "r"(addr)); __asm __volatile("isb sy"); #elif defined (__powerpc64__) From 40e3ad2a19d821f293dbfc500beb6b9f7af41db2 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:25:56 +0000 Subject: [PATCH 4/6] Vendor import of libc++ release_60 branch r323338: https://llvm.org/svn/llvm-project/libcxx/branches/release_60@323338 --- include/type_traits | 62 +++++++++++++++++-- .../meta.unary.prop/is_constructible.pass.cpp | 12 ++++ 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/include/type_traits b/include/type_traits index 7a6c992930e4..eb443ee0abd6 100644 --- a/include/type_traits +++ b/include/type_traits @@ -3172,6 +3172,14 @@ template false_type __is_constructible2_test(__any, _A0&, _A1&); +template +decltype((_Tp(_VSTD::declval<_A0>(), _VSTD::declval<_A1>(), _VSTD::declval<_A2>()), true_type())) +__is_constructible3_test(_Tp&, _A0&, _A1&, _A2&); + +template +false_type +__is_constructible3_test(__any, _A0&, _A1&, _A2&); + template struct __is_constructible0_imp // false, _Tp is not a scalar : public common_type @@ -3196,6 +3204,14 @@ struct __is_constructible2_imp // false, _Tp is not a scalar >::type {}; +template +struct __is_constructible3_imp // false, _Tp is not a scalar + : public common_type + < + decltype(__is_constructible3_test(declval<_Tp&>(), declval<_A0>(), declval<_A1>(), declval<_A2>())) + >::type + {}; + // handle scalars and reference types // Scalars are default constructible, references are not @@ -3215,6 +3231,11 @@ struct __is_constructible2_imp : public false_type {}; +template +struct __is_constructible3_imp + : public false_type + {}; + // Treat scalars and reference types separately template @@ -3235,6 +3256,12 @@ struct __is_constructible2_void_check _Tp, _A0, _A1> {}; +template +struct __is_constructible3_void_check + : public __is_constructible3_imp::value || is_reference<_Tp>::value, + _Tp, _A0, _A1, _A2> + {}; + // If any of T or Args is void, is_constructible should be false template @@ -3252,17 +3279,24 @@ struct __is_constructible2_void_check : public false_type {}; +template +struct __is_constructible3_void_check + : public false_type + {}; + // is_constructible entry point template + class _A1 = __is_construct::__nat, + class _A2 = __is_construct::__nat> struct _LIBCPP_TEMPLATE_VIS is_constructible - : public __is_constructible2_void_check::value + : public __is_constructible3_void_check::value || is_abstract<_Tp>::value || is_function<_Tp>::value || is_void<_A0>::value - || is_void<_A1>::value, - _Tp, _A0, _A1> + || is_void<_A1>::value + || is_void<_A2>::value, + _Tp, _A0, _A1, _A2> {}; template @@ -3282,6 +3316,16 @@ struct _LIBCPP_TEMPLATE_VIS is_constructible<_Tp, _A0, __is_construct::__nat> _Tp, _A0> {}; +template +struct _LIBCPP_TEMPLATE_VIS is_constructible<_Tp, _A0, _A1, __is_construct::__nat> + : public __is_constructible2_void_check::value + || is_abstract<_Tp>::value + || is_function<_Tp>::value + || is_void<_A0>::value + || is_void<_A1>::value, + _Tp, _A0, _A1> + {}; + // Array types are default constructible if their element type // is default constructible @@ -3300,6 +3344,11 @@ struct __is_constructible2_imp : public false_type {}; +template +struct __is_constructible3_imp + : public false_type + {}; + // Incomplete array types are not constructible template @@ -3317,6 +3366,11 @@ struct __is_constructible2_imp : public false_type {}; +template +struct __is_constructible3_imp + : public false_type + {}; + #endif // __has_feature(is_constructible) diff --git a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp index 1f7c32a8cc07..b90363a5c380 100644 --- a/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp +++ b/test/std/utilities/meta/meta.unary/meta.unary.prop/is_constructible.pass.cpp @@ -30,6 +30,7 @@ struct A { explicit A(int); A(int, double); + A(int, long, double); #if TEST_STD_VER >= 11 private: #endif @@ -106,6 +107,16 @@ void test_is_constructible() #endif } +template +void test_is_constructible() +{ + static_assert(( std::is_constructible::value), ""); + LIBCPP11_STATIC_ASSERT((std::__libcpp_is_constructible::type::value), ""); +#if TEST_STD_VER > 14 + static_assert(( std::is_constructible_v), ""); +#endif +} + template void test_is_not_constructible() { @@ -146,6 +157,7 @@ int main() test_is_constructible (); test_is_constructible (); test_is_constructible (); + test_is_constructible (); test_is_constructible (); test_is_not_constructible (); From a506d0d6a9a6c2745e058e35ad4c62d1ddc5f20e Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:26:03 +0000 Subject: [PATCH 5/6] Vendor import of lld release_60 branch r323338: https://llvm.org/svn/llvm-project/lld/branches/release_60@323338 --- COFF/Driver.cpp | 1 + ELF/LinkerScript.cpp | 28 ++++--- ELF/OutputSections.cpp | 9 --- ELF/OutputSections.h | 1 + ELF/ScriptParser.cpp | 11 +++ ELF/SymbolTable.cpp | 3 +- ELF/SyntheticSections.cpp | 7 +- ELF/Writer.cpp | 7 +- test/ELF/Inputs/as-needed-lazy.s | 3 + test/ELF/Inputs/compress-debug.s | 5 ++ test/ELF/as-needed-lazy.s | 14 ++++ test/ELF/compress-debug-sections-reloc.s | 26 ++++++ test/ELF/linkerscript/at-self-reference.s | 63 +++++++++++++++ test/ELF/linkerscript/at2.s | 81 +++++++++++++++++++ .../compress-debug-sections-custom.s | 35 ++++++++ test/ELF/linkerscript/parse-section-in-addr.s | 10 +++ test/ELF/sysv-hash-no-rosegment.s | 13 +++ 17 files changed, 289 insertions(+), 28 deletions(-) create mode 100644 test/ELF/Inputs/as-needed-lazy.s create mode 100644 test/ELF/Inputs/compress-debug.s create mode 100644 test/ELF/as-needed-lazy.s create mode 100644 test/ELF/compress-debug-sections-reloc.s create mode 100644 test/ELF/linkerscript/at-self-reference.s create mode 100644 test/ELF/linkerscript/at2.s create mode 100644 test/ELF/linkerscript/compress-debug-sections-custom.s create mode 100644 test/ELF/linkerscript/parse-section-in-addr.s create mode 100644 test/ELF/sysv-hash-no-rosegment.s diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp index 1aaec355c7a5..0f3d8fb0b4ef 100644 --- a/COFF/Driver.cpp +++ b/COFF/Driver.cpp @@ -57,6 +57,7 @@ bool link(ArrayRef Args, bool CanExitEarly, raw_ostream &Diag) { errorHandler().ErrorLimitExceededMsg = "too many errors emitted, stopping now" " (use /ERRORLIMIT:0 to see all errors)"; + errorHandler().ExitEarly = CanExitEarly; Config = make(); Config->Argv = {Args.begin(), Args.end()}; Config->CanExitEarly = CanExitEarly; diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp index 8f50a977fd75..33a618952456 100644 --- a/ELF/LinkerScript.cpp +++ b/ELF/LinkerScript.cpp @@ -608,13 +608,6 @@ void LinkerScript::switchTo(OutputSection *Sec) { Ctx->OutSec = Sec; Ctx->OutSec->Addr = advance(0, Ctx->OutSec->Alignment); - - // If neither AT nor AT> is specified for an allocatable section, the linker - // will set the LMA such that the difference between VMA and LMA for the - // section is the same as the preceding output section in the same region - // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html - if (Ctx->LMAOffset) - Ctx->OutSec->LMAOffset = Ctx->LMAOffset(); } // This function searches for a memory region to place the given output @@ -662,17 +655,28 @@ void LinkerScript::assignOffsets(OutputSection *Sec) { if (Ctx->MemRegion) Dot = Ctx->MemRegionOffset[Ctx->MemRegion]; + switchTo(Sec); + if (Sec->LMAExpr) { uint64_t D = Dot; Ctx->LMAOffset = [=] { return Sec->LMAExpr().getValue() - D; }; } - switchTo(Sec); + if (!Sec->LMARegionName.empty()) { + if (MemoryRegion *MR = MemoryRegions.lookup(Sec->LMARegionName)) { + uint64_t Offset = MR->Origin - Dot; + Ctx->LMAOffset = [=] { return Offset; }; + } else { + error("memory region '" + Sec->LMARegionName + "' not declared"); + } + } - // We do not support custom layout for compressed debug sectons. - // At this point we already know their size and have compressed content. - if (Ctx->OutSec->Flags & SHF_COMPRESSED) - return; + // If neither AT nor AT> is specified for an allocatable section, the linker + // will set the LMA such that the difference between VMA and LMA for the + // section is the same as the preceding output section in the same region + // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html + if (Ctx->LMAOffset) + Ctx->OutSec->LMAOffset = Ctx->LMAOffset(); // The Size previously denoted how many InputSections had been added to this // section, and was used for sorting SHF_LINK_ORDER sections. Reset it to diff --git a/ELF/OutputSections.cpp b/ELF/OutputSections.cpp index f0677f7e1ca5..94c98284196f 100644 --- a/ELF/OutputSections.cpp +++ b/ELF/OutputSections.cpp @@ -183,15 +183,6 @@ template void OutputSection::maybeCompress() { !Name.startswith(".debug_")) return; - // Calculate the section offsets and size pre-compression. - Size = 0; - for (BaseCommand *Cmd : SectionCommands) - if (auto *ISD = dyn_cast(Cmd)) - for (InputSection *IS : ISD->Sections) { - IS->OutSecOff = alignTo(Size, IS->Alignment); - this->Size = IS->OutSecOff + IS->getSize(); - } - // Create a section header. ZDebugHeader.resize(sizeof(Elf_Chdr)); auto *Hdr = reinterpret_cast(ZDebugHeader.data()); diff --git a/ELF/OutputSections.h b/ELF/OutputSections.h index b2845773e9af..009f45c03333 100644 --- a/ELF/OutputSections.h +++ b/ELF/OutputSections.h @@ -99,6 +99,7 @@ class OutputSection final : public BaseCommand, public SectionBase { ConstraintKind Constraint = ConstraintKind::NoConstraint; std::string Location; std::string MemoryRegionName; + std::string LMARegionName; bool Noload = false; template void finalize(); diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp index 4263944981f2..e068beeee262 100644 --- a/ELF/ScriptParser.cpp +++ b/ELF/ScriptParser.cpp @@ -709,6 +709,14 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { if (consume(">")) Cmd->MemoryRegionName = next(); + if (consume("AT")) { + expect(">"); + Cmd->LMARegionName = next(); + } + + if (Cmd->LMAExpr && !Cmd->LMARegionName.empty()) + error("section can't have both LMA and a load region"); + Cmd->Phdrs = readOutputSectionPhdrs(); if (consume("=")) @@ -922,7 +930,10 @@ ByteCommand *ScriptParser::readByteCommand(StringRef Tok) { StringRef ScriptParser::readParenLiteral() { expect("("); + bool Orig = InExpr; + InExpr = false; StringRef Tok = next(); + InExpr = Orig; expect(")"); return Tok; } diff --git a/ELF/SymbolTable.cpp b/ELF/SymbolTable.cpp index b6bf21998863..c3a00bea4aaa 100644 --- a/ELF/SymbolTable.cpp +++ b/ELF/SymbolTable.cpp @@ -491,12 +491,13 @@ void SymbolTable::addShared(StringRef Name, SharedFile &File, if (WasInserted || ((S->isUndefined() || S->isLazy()) && S->getVisibility() == STV_DEFAULT)) { uint8_t Binding = S->Binding; + bool WasUndefined = S->isUndefined(); replaceSymbol(S, File, Name, Sym.getBinding(), Sym.st_other, Sym.getType(), Sym.st_value, Sym.st_size, Alignment, VerdefIndex); if (!WasInserted) { S->Binding = Binding; - if (!S->isWeak() && !Config->GcSections) + if (!S->isWeak() && !Config->GcSections && WasUndefined) File.IsNeeded = true; } } diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp index f878acf8cf3f..a5a851f95400 100644 --- a/ELF/SyntheticSections.cpp +++ b/ELF/SyntheticSections.cpp @@ -1823,6 +1823,9 @@ void HashTableSection::finalizeContents() { } void HashTableSection::writeTo(uint8_t *Buf) { + // See comment in GnuHashTableSection::writeTo. + memset(Buf, 0, Size); + unsigned NumSymbols = InX::DynSymTab->getNumSymbols(); uint32_t *P = reinterpret_cast(Buf); @@ -2435,10 +2438,8 @@ void MergeNoTailSection::finalizeContents() { parallelForEachN(0, Concurrency, [&](size_t ThreadId) { for (MergeInputSection *Sec : Sections) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { - if (!Sec->Pieces[I].Live) - continue; size_t ShardId = getShardId(Sec->Pieces[I].Hash); - if ((ShardId & (Concurrency - 1)) == ThreadId) + if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); } } diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp index 24c3e1ee207c..5feff456ffa9 100644 --- a/ELF/Writer.cpp +++ b/ELF/Writer.cpp @@ -427,13 +427,14 @@ template void Writer::run() { if (errorCount()) return; + Script->assignAddresses(); + // If -compressed-debug-sections is specified, we need to compress // .debug_* sections. Do it right now because it changes the size of // output sections. - parallelForEach(OutputSections, - [](OutputSection *Sec) { Sec->maybeCompress(); }); + for (OutputSection *Sec : OutputSections) + Sec->maybeCompress(); - Script->assignAddresses(); Script->allocateHeaders(Phdrs); // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a diff --git a/test/ELF/Inputs/as-needed-lazy.s b/test/ELF/Inputs/as-needed-lazy.s new file mode 100644 index 000000000000..7f9c360dda20 --- /dev/null +++ b/test/ELF/Inputs/as-needed-lazy.s @@ -0,0 +1,3 @@ +.global foo +foo: + nop diff --git a/test/ELF/Inputs/compress-debug.s b/test/ELF/Inputs/compress-debug.s new file mode 100644 index 000000000000..5fd9d39a98a0 --- /dev/null +++ b/test/ELF/Inputs/compress-debug.s @@ -0,0 +1,5 @@ +.text +.fill 0x44 + +.section .debug_info,"",@progbits +.fill 0x43 diff --git a/test/ELF/as-needed-lazy.s b/test/ELF/as-needed-lazy.s new file mode 100644 index 000000000000..e892b9980aad --- /dev/null +++ b/test/ELF/as-needed-lazy.s @@ -0,0 +1,14 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/as-needed-lazy.s -o %t2.o +# RUN: ld.lld %t2.o -o %t2.so -shared +# RUN: rm -f %t2.a +# RUN: llvm-ar rc %t2.a %t2.o +# RUN: ld.lld %t1.o %t2.a --as-needed %t2.so -o %t +# RUN: llvm-readobj -d %t | FileCheck %s + +# CHECK-NOT: NEEDED + +.global _start +_start: + nop diff --git a/test/ELF/compress-debug-sections-reloc.s b/test/ELF/compress-debug-sections-reloc.s new file mode 100644 index 000000000000..b4ee4ea6dd97 --- /dev/null +++ b/test/ELF/compress-debug-sections-reloc.s @@ -0,0 +1,26 @@ +# REQUIRES: x86, zlib + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/compress-debug.s -o %t2.o +# RUN: ld.lld %t2.o %t.o -o %t1 --compress-debug-sections=zlib -Ttext=0 +# RUN: llvm-dwarfdump %t1 -debug-str | FileCheck %s +# These two checks correspond to the patched values of a_sym and a_debug_sym. +# D = 0x44 - address of .text input section for this file (the start address of +# .text is 0 as requested on the command line, and the size of the +# preceding .text in the other input file is 0x44). +# C = 0x43 - offset of .debug_info section for this file (the size of +# the preceding .debug_info from the other input file is 0x43). +# CHECK: 0x00000000: "D" +# CHECK: 0x00000004: "C" + +.text +a_sym: +nop + +.section .debug_str,"",@progbits +.long a_sym +.long a_debug_sym + +.section .debug_info,"",@progbits +a_debug_sym: +.long 0x88776655 diff --git a/test/ELF/linkerscript/at-self-reference.s b/test/ELF/linkerscript/at-self-reference.s new file mode 100644 index 000000000000..7208a4b9fcd4 --- /dev/null +++ b/test/ELF/linkerscript/at-self-reference.s @@ -0,0 +1,63 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: echo "SECTIONS { \ +# RUN: . = 0x1000; \ +# RUN: .aaa : AT(ADDR(.aaa)) { *(.aaa) } \ +# RUN: .bbb : AT(ADDR(.bbb)) { *(.bbb) } \ +# RUN: }" > %t.script +# RUN: ld.lld %t --script %t.script -o %t2 +# RUN: llvm-readobj -program-headers %t2 | FileCheck %s + +# CHECK: ProgramHeaders [ +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD (0x1) +# CHECK-NEXT: Offset: 0x1000 +# CHECK-NEXT: VirtualAddress: 0x1000 +# CHECK-NEXT: PhysicalAddress: 0x1000 +# CHECK-NEXT: FileSize: 3 +# CHECK-NEXT: MemSize: 3 +# CHECK-NEXT: Flags [ (0x5) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: PF_X (0x1) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 4096 +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD (0x1) +# CHECK-NEXT: Offset: 0x1008 +# CHECK-NEXT: VirtualAddress: 0x1008 +# CHECK-NEXT: PhysicalAddress: 0x1008 +# CHECK-NEXT: FileSize: 9 +# CHECK-NEXT: MemSize: 9 +# CHECK-NEXT: Flags [ (0x5) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: PF_X (0x1) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 4096 +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_GNU_STACK (0x6474E551) +# CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: VirtualAddress: 0x0 +# CHECK-NEXT: PhysicalAddress: 0x0 +# CHECK-NEXT: FileSize: 0 +# CHECK-NEXT: MemSize: 0 +# CHECK-NEXT: Flags [ (0x6) +# CHECK-NEXT: PF_R (0x4) +# CHECK-NEXT: PF_W (0x2) +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: } +# CHECK-NEXT:] + +.global _start +_start: + nop + + +.section .aaa, "a" +.asciz "aa" + +.section .bbb, "a" +.align 8 +.quad 0 diff --git a/test/ELF/linkerscript/at2.s b/test/ELF/linkerscript/at2.s new file mode 100644 index 000000000000..1545b1d826a3 --- /dev/null +++ b/test/ELF/linkerscript/at2.s @@ -0,0 +1,81 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: echo "MEMORY { \ +# RUN: AX (ax) : ORIGIN = 0x2000, LENGTH = 0x100 \ +# RUN: AW (aw) : ORIGIN = 0x3000, LENGTH = 0x100 \ +# RUN: FLASH (ax) : ORIGIN = 0x6000, LENGTH = 0x100 \ +# RUN: RAM (aw) : ORIGIN = 0x7000, LENGTH = 0x100 } \ +# RUN: SECTIONS { \ +# RUN: .foo1 : { *(.foo1) } > AX AT>FLASH \ +# RUN: .foo2 : { *(.foo2) } > AX \ +# RUN: .bar1 : { *(.bar1) } > AW AT> RAM \ +# RUN: .bar2 : { *(.bar2) } > AW AT > RAM \ +# RUN: .bar3 : { *(.bar3) } > AW AT >RAM \ +# RUN: }" > %t.script +# RUN: ld.lld %t --script %t.script -o %t2 +# RUN: llvm-readobj -program-headers %t2 | FileCheck %s +# RUN: llvm-objdump -section-headers %t2 | FileCheck %s --check-prefix=SECTIONS + +# CHECK: ProgramHeaders [ +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD +# CHECK-NEXT: Offset: 0x1000 +# CHECK-NEXT: VirtualAddress: 0x2000 +# CHECK-NEXT: PhysicalAddress: 0x6000 +# CHECK-NEXT: FileSize: 16 +# CHECK-NEXT: MemSize: 16 +# CHECK-NEXT: Flags [ +# CHECK-NEXT: PF_R +# CHECK-NEXT: PF_X +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: +# CHECK-NEXT: } +# CHECK-NEXT: ProgramHeader { +# CHECK-NEXT: Type: PT_LOAD +# CHECK-NEXT: Offset: 0x2000 +# CHECK-NEXT: VirtualAddress: 0x3000 +# CHECK-NEXT: PhysicalAddress: 0x7000 +# CHECK-NEXT: FileSize: 24 +# CHECK-NEXT: MemSize: 24 +# CHECK-NEXT: Flags [ +# CHECK-NEXT: PF_R +# CHECK-NEXT: PF_W +# CHECK-NEXT: ] +# CHECK-NEXT: Alignment: 4096 +# CHECK-NEXT: } + +# SECTIONS: Sections: +# SECTIONS-NEXT: Idx Name Size Address +# SECTIONS-NEXT: 0 00000000 0000000000000000 +# SECTIONS-NEXT: 1 .foo1 00000008 0000000000002000 +# SECTIONS-NEXT: 2 .foo2 00000008 0000000000002008 +# SECTIONS-NEXT: 3 .text 00000000 0000000000002010 +# SECTIONS-NEXT: 4 .bar1 00000008 0000000000003000 +# SECTIONS-NEXT: 5 .bar2 00000008 0000000000003008 +# SECTIONS-NEXT: 6 .bar3 00000008 0000000000003010 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: echo "MEMORY { \ +# RUN: FLASH (ax) : ORIGIN = 0x2000, LENGTH = 0x100 \ +# RUN: RAM (aw) : ORIGIN = 0x5000, LENGTH = 0x100 } \ +# RUN: SECTIONS { \ +# RUN: .foo1 : AT(0x500) { *(.foo1) } > FLASH AT>FLASH \ +# RUN: }" > %t2.script +# RUN: not ld.lld %t --script %t2.script -o %t2 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR +# ERR: error: section can't have both LMA and a load region + +.section .foo1, "ax" +.quad 0 + +.section .foo2, "ax" +.quad 0 + +.section .bar1, "aw" +.quad 0 + +.section .bar2, "aw" +.quad 0 + +.section .bar3, "aw" +.quad 0 diff --git a/test/ELF/linkerscript/compress-debug-sections-custom.s b/test/ELF/linkerscript/compress-debug-sections-custom.s new file mode 100644 index 000000000000..31fdd56381b0 --- /dev/null +++ b/test/ELF/linkerscript/compress-debug-sections-custom.s @@ -0,0 +1,35 @@ +# REQUIRES: x86, zlib + +# RUN: echo "SECTIONS { \ +# RUN: .text : { . += 0x10; *(.text) } \ +# RUN: .debug_str : { . += 0x10; *(.debug_str) } \ +# RUN: .debug_info : { . += 0x10; *(.debug_info) } \ +# RUN: }" > %t.script + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/../Inputs/compress-debug.s -o %t2.o +# RUN: ld.lld %t2.o %t.o -o %t1 --compress-debug-sections=zlib -T %t.script +# RUN: llvm-dwarfdump %t1 -debug-str | FileCheck %s +# These two checks correspond to the patched values of a_sym and a_debug_sym. +# T = 0x54 - address of .text input section for this file (the start address of +# .text is 0 by default, the size of the preceding .text in the other input +# file is 0x44, and the linker script adds an additional 0x10). +# S = 0x53 - offset of .debug_info section for this file (the size of +# the preceding .debug_info from the other input file is 0x43, and the +# linker script adds an additional 0x10). +# Also note that the .debug_str offsets are also offset by 0x10, as directed by +# the linker script. +# CHECK: 0x00000010: "T" +# CHECK: 0x00000014: "S" + +.text +a_sym: +nop + +.section .debug_str,"",@progbits +.long a_sym +.long a_debug_sym + +.section .debug_info,"",@progbits +a_debug_sym: +.long 0x88776655 diff --git a/test/ELF/linkerscript/parse-section-in-addr.s b/test/ELF/linkerscript/parse-section-in-addr.s new file mode 100644 index 000000000000..7a79f646310d --- /dev/null +++ b/test/ELF/linkerscript/parse-section-in-addr.s @@ -0,0 +1,10 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o + +# RUN: echo "SECTIONS { \ +# RUN: .foo-bar : AT(ADDR(.foo-bar)) { *(.text) } \ +# RUN: }" > %t.script +# RUN: ld.lld -o %t.so --script %t.script %t.o -shared +# RUN: llvm-readelf -S %t.so | FileCheck %s + +# CHECK: .foo-bar diff --git a/test/ELF/sysv-hash-no-rosegment.s b/test/ELF/sysv-hash-no-rosegment.s new file mode 100644 index 000000000000..31b9d2fbec05 --- /dev/null +++ b/test/ELF/sysv-hash-no-rosegment.s @@ -0,0 +1,13 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o +# RUN: ld.lld -shared --no-rosegment -o %t %t.o +# RUN: llvm-readobj -hash-table %t | FileCheck %s + +# CHECK: HashTable { +# CHECK-NEXT: Num Buckets: 2 +# CHECK-NEXT: Num Chains: 2 +# CHECK-NEXT: Buckets: [1, 0] +# CHECK-NEXT: Chains: [0, 0] +# CHECK-NEXT: } + +callq undef@PLT From a2cf70158c66891c9c041270b450e9699b0439fb Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Jan 2018 20:26:12 +0000 Subject: [PATCH 6/6] Vendor import of lldb release_60 branch r323338: https://llvm.org/svn/llvm-project/lldb/branches/release_60@323338 --- lit/CMakeLists.txt | 4 ++++ lit/lit.cfg | 4 ++-- lit/lit.site.cfg.in | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lit/CMakeLists.txt b/lit/CMakeLists.txt index 5488154318a9..03fe3d881e9c 100644 --- a/lit/CMakeLists.txt +++ b/lit/CMakeLists.txt @@ -11,6 +11,10 @@ else() set(ENABLE_SHARED 0) endif(BUILD_SHARED_LIBS) +# the value is not canonicalized within LLVM +llvm_canonicalize_cmake_booleans( + LLVM_ENABLE_ZLIB) + configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg) diff --git a/lit/lit.cfg b/lit/lit.cfg index 402d03947ca8..4a190a7328ba 100644 --- a/lit/lit.cfg +++ b/lit/lit.cfg @@ -91,11 +91,11 @@ for pattern in [r"\bFileCheck\b", pattern) tool_pipe = tool_match.group(2) tool_name = tool_match.group(4) - tool_path = lit.util.which(tool_name, config.llvm_tools_dir) + tool_path = lit.util.which(tool_name, config.environment['PATH']) if not tool_path: # Warn, but still provide a substitution. lit_config.note( - 'Did not find ' + tool_name + ' in ' + config.llvm_tools_dir) + 'Did not find ' + tool_name + ' in ' + config.environment['PATH']) config.substitutions.append((pattern, tool_pipe + tool_path)) # Shell execution diff --git a/lit/lit.site.cfg.in b/lit/lit.site.cfg.in index 2cfa677651a1..c6550877751f 100644 --- a/lit/lit.site.cfg.in +++ b/lit/lit.site.cfg.in @@ -12,7 +12,7 @@ config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@PYTHON_EXECUTABLE@" config.cc = "@LLDB_TEST_C_COMPILER@" config.cxx = "@LLDB_TEST_CXX_COMPILER@" -config.have_zlib = @HAVE_LIBZ@ +config.have_zlib = @LLVM_ENABLE_ZLIB@ # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time.