From dadbdfff07596fc3b48cc1e735181b9b8c893f67 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 22 Jan 2016 21:16:09 +0000 Subject: [PATCH 1/2] Vendor import of llvm release_38 branch r258549: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258549 --- include/llvm/CodeGen/MachineFunction.h | 2 +- include/llvm/CodeGen/SelectionDAGNodes.h | 15 + include/llvm/IR/GlobalValue.h | 4 + include/llvm/Transforms/Utils/Local.h | 19 + .../llvm/Transforms/Utils/SimplifyLibCalls.h | 2 - lib/CodeGen/AsmPrinter/DebugLocEntry.h | 13 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 18 + lib/CodeGen/CodeGenPrepare.cpp | 35 +- lib/CodeGen/MachineFunction.cpp | 2 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 44 +- lib/IR/Globals.cpp | 44 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 9 +- .../MCTargetDesc/AArch64ELFStreamer.cpp | 14 +- lib/Target/ARM/ARMISelLowering.cpp | 9 +- lib/Target/X86/X86CallingConv.td | 4 +- lib/Target/X86/X86FrameLowering.cpp | 18 +- lib/Target/X86/X86ISelLowering.cpp | 9 +- .../InstCombine/InstCombineAndOrXor.cpp | 187 +------ lib/Transforms/Utils/InlineFunction.cpp | 331 ++++++++++++- lib/Transforms/Utils/Local.cpp | 235 ++++++++- lib/Transforms/Utils/SimplifyLibCalls.cpp | 55 ++- test/CodeGen/AArch64/cxx-tlscc.ll | 27 ++ test/CodeGen/ARM/cse-flags.ll | 43 ++ test/CodeGen/ARM/cxx-tlscc.ll | 11 + test/CodeGen/ARM/memfunc.ll | 18 +- test/CodeGen/X86/2014-05-30-CombineAddNSW.ll | 20 - test/CodeGen/X86/cxx_tlscc64.ll | 27 ++ test/CodeGen/X86/x86-shrink-wrap-unwind.ll | 83 +++- test/DebugInfo/ARM/PR26163.ll | 107 ++++ .../MCJIT/remote/cross-module-a.ll | 2 +- .../MCJIT/remote/multi-module-a.ll | 2 +- .../MCJIT/remote/simpletest-remote.ll | 2 +- .../MCJIT/remote/stubs-remote.ll | 2 +- .../remote/test-common-symbols-remote.ll | 2 +- .../MCJIT/remote/test-data-align-remote.ll | 2 +- .../test-fp-no-external-funcs-remote.ll | 2 +- .../remote/test-global-init-nonzero-remote.ll | 2 +- .../remote/test-global-init-nonzero-sm-pic.ll | 2 +- .../MCJIT/remote/test-ptr-reloc-remote.ll | 2 +- .../MCJIT/remote/test-ptr-reloc-sm-pic.ll | 2 +- .../OrcMCJIT/remote/cross-module-a.ll | 2 +- .../OrcMCJIT/remote/multi-module-a.ll | 2 +- .../OrcMCJIT/remote/simpletest-remote.ll | 2 +- .../OrcMCJIT/remote/stubs-remote.ll | 2 +- .../remote/test-common-symbols-remote.ll | 2 +- .../OrcMCJIT/remote/test-data-align-remote.ll | 2 +- .../test-fp-no-external-funcs-remote.ll | 2 +- .../remote/test-global-init-nonzero-remote.ll | 2 +- .../remote/test-global-init-nonzero-sm-pic.ll | 2 +- .../OrcMCJIT/remote/test-ptr-reloc-remote.ll | 2 +- .../OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll | 2 +- test/MC/AArch64/inst-directive.s | 15 +- .../ARM/bitreverse-recognize.ll | 37 ++ .../CodeGenPrepare/ARM/lit.local.cfg | 3 + .../CodeGenPrepare/bitreverse-hang.ll | 53 ++ test/Transforms/Inline/inline-funclets.ll | 455 ++++++++++++++++++ .../Transforms/InstCombine/bitreverse-hang.ll | 53 ++ .../InstCombine/bitreverse-recognize.ll | 114 ----- test/Transforms/InstCombine/cos-2.ll | 16 +- .../InstCombine/double-float-shrink-1.ll | 20 + tools/lli/lli.cpp | 5 +- utils/release/test-release.sh | 21 +- 62 files changed, 1772 insertions(+), 469 deletions(-) create mode 100644 test/CodeGen/ARM/cse-flags.ll delete mode 100644 test/CodeGen/X86/2014-05-30-CombineAddNSW.ll create mode 100644 test/DebugInfo/ARM/PR26163.ll create mode 100644 test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll create mode 100644 test/Transforms/CodeGenPrepare/ARM/lit.local.cfg create mode 100644 test/Transforms/CodeGenPrepare/bitreverse-hang.ll create mode 100644 test/Transforms/Inline/inline-funclets.ll create mode 100644 test/Transforms/InstCombine/bitreverse-hang.ll delete mode 100644 test/Transforms/InstCombine/bitreverse-recognize.ll diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 82c30d39afd6..df7c951743c9 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -295,7 +295,7 @@ class MachineFunction { } /// Should we be emitting segmented stack stuff for the function - bool shouldSplitStack(); + bool shouldSplitStack() const; /// getNumBlockIDs - Return the number of MBB ID's allocated. /// diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 23816bde07c0..536fc656e8e2 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -369,6 +369,18 @@ struct SDNodeFlags { (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) | (NoSignedZeros << 6) | (AllowReciprocal << 7); } + + /// Clear any flags in this flag set that aren't also set in Flags. + void intersectWith(const SDNodeFlags *Flags) { + NoUnsignedWrap &= Flags->NoUnsignedWrap; + NoSignedWrap &= Flags->NoSignedWrap; + Exact &= Flags->Exact; + UnsafeAlgebra &= Flags->UnsafeAlgebra; + NoNaNs &= Flags->NoNaNs; + NoInfs &= Flags->NoInfs; + NoSignedZeros &= Flags->NoSignedZeros; + AllowReciprocal &= Flags->AllowReciprocal; + } }; /// Represents one node in the SelectionDAG. @@ -682,6 +694,9 @@ class SDNode : public FoldingSetNode, public ilist_node { /// and directly, but it is not to avoid creating a vtable for this class. const SDNodeFlags *getFlags() const; + /// Clear any flags in this node that aren't also set in Flags. + void intersectFlagsWith(const SDNodeFlags *Flags); + /// Return the number of values defined/returned by this operator. unsigned getNumValues() const { return NumValues; } diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 4fa4e7daeab0..fa6469aa0ade 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -346,6 +346,10 @@ class GlobalValue : public Constant { return !(isDeclarationForLinker() || isWeakForLinker()); } + // Returns true if the alignment of the value can be unilaterally + // increased. + bool canIncreaseAlignment() const; + /// This method unlinks 'this' from the containing module, but does not delete /// it. virtual void removeFromParent() = 0; diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 911c6f14da0b..3ae01657a2ec 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -331,6 +331,25 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, /// during lowering by the GC infrastructure. bool callsGCLeafFunction(ImmutableCallSite CS); +//===----------------------------------------------------------------------===// +// Intrinsic pattern matching +// + +/// Try and match a bitreverse or bswap idiom. +/// +/// If an idiom is matched, an intrinsic call is inserted before \c I. Any added +/// instructions are returned in \c InsertedInsts. They will all have been added +/// to a basic block. +/// +/// A bitreverse idiom normally requires around 2*BW nodes to be searched (where +/// BW is the bitwidth of the integer type). A bswap idiom requires anywhere up +/// to BW / 4 nodes to be searched, so is significantly faster. +/// +/// This function returns true on a successful match or false otherwise. +bool recognizeBitReverseOrBSwapIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl &InsertedInsts); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 410a075aeb98..fc34f49a1255 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -125,8 +125,6 @@ class LibCallSimplifier { Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); // Math Library Optimizations - Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType); - Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B); Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index bbe53249a084..b60ab9151ef2 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -93,18 +93,7 @@ class DebugLocEntry { /// variable, merge them by appending Next's values to the current /// list of values. /// Return true if the merge was successful. - bool MergeValues(const DebugLocEntry &Next) { - if (Begin == Next.Begin) { - auto *Expr = cast_or_null(Values[0].Expression); - auto *NextExpr = cast_or_null(Next.Values[0].Expression); - if (Expr->isBitPiece() && NextExpr->isBitPiece()) { - addValues(Next.Values); - End = Next.End; - return true; - } - } - return false; - } + bool MergeValues(const DebugLocEntry &Next); /// \brief Attempt to merge this DebugLocEntry with Next and return /// true if the merge was successful. Entries can be merged if they diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index a4fb07eacb3b..ae62b6b19a42 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -805,6 +805,24 @@ static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { return (l1 < r2) && (l2 < r1); } +/// \brief If this and Next are describing different pieces of the same +/// variable, merge them by appending Next's values to the current +/// list of values. +/// Return true if the merge was successful. +bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { + if (Begin == Next.Begin) { + auto *Expr = cast_or_null(Values[0].Expression); + auto *NextExpr = cast_or_null(Next.Values[0].Expression); + if (Expr->isBitPiece() && NextExpr->isBitPiece() && + !piecesOverlap(Expr, NextExpr)) { + addValues(Next.Values); + End = Next.End; + return true; + } + } + return false; +} + /// Build the location list for all DBG_VALUEs in the function that /// describe the same variable. If the ranges of several independent /// pieces of the same variable overlap partially, split them up and diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 03e57787307a..c8007a524e70 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1742,8 +1742,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { // over-aligning global variables that have an explicit section is // forbidden. GlobalVariable *GV; - if ((GV = dyn_cast(Val)) && GV->hasUniqueInitializer() && - !GV->hasSection() && GV->getAlignment() < PrefAlign && + if ((GV = dyn_cast(Val)) && GV->canIncreaseAlignment() && + GV->getAlignment() < PrefAlign && DL->getTypeAllocSize(GV->getType()->getElementType()) >= MinSize + Offset2) GV->setAlignment(PrefAlign); @@ -5211,6 +5211,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { return false; } +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +static bool makeBitReverse(Instruction &I, const DataLayout &DL, + const TargetLowering &TLI) { + if (!I.getType()->isIntegerTy() || + !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, + TLI.getValueType(DL, I.getType(), true))) + return false; + + SmallVector Insts; + if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts)) + return false; + Instruction *LastInst = Insts.back(); + I.replaceAllUsesWith(LastInst); + RecursivelyDeleteTriviallyDeadInstructions(&I); + return true; +} + // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. @@ -5224,8 +5242,19 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { if (ModifiedDT) return true; } - MadeChange |= dupRetToEnableTailCallOpts(&BB); + bool MadeBitReverse = true; + while (TLI && MadeBitReverse) { + MadeBitReverse = false; + for (auto &I : reverse(BB)) { + if (makeBitReverse(I, *DL, *TLI)) { + MadeBitReverse = MadeChange = true; + break; + } + } + } + MadeChange |= dupRetToEnableTailCallOpts(&BB); + return MadeChange; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index ca4bb1c6ad49..f6604f38722a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { } /// Should we be emitting segmented stack stuff for the function -bool MachineFunction::shouldSplitStack() { +bool MachineFunction::shouldSplitStack() const { return getFunction()->hasFnAttribute("split-stack"); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 96bf914701c6..893871f94485 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } -/// Add logical or fast math flag values to FoldingSetNodeID value. -static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, - const SDNodeFlags *Flags) { - if (!isBinOpWithFlags(Opcode)) - return; - - unsigned RawFlags = 0; - if (Flags) - RawFlags = Flags->getRawFlags(); - ID.AddInteger(RawFlags); -} - -static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { - AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); -} - static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, SDVTList VTList, ArrayRef OpList) { AddNodeIDOpcode(ID, OpC); @@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } } // end switch (N->getOpcode()) - AddNodeIDFlags(ID, N); - // Target specific memory nodes could also have address spaces to check. if (N->isTargetMemoryOpcode()) ID.AddInteger(cast(N)->getPointerInfo().getAddrSpace()); @@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef Ops, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); - AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (Flags) + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); @@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); - AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) { + if (Flags) + E->intersectFlagsWith(Flags); return E; + } } return nullptr; } @@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const { return nullptr; } +void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { + if (auto *FlagsNode = dyn_cast(this)) + FlagsNode->Flags.intersectWith(Flags); +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 6159f93faf89..a61b62bd9687 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -12,11 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/GlobalValue.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -134,6 +135,47 @@ bool GlobalValue::isDeclaration() const { return false; } +bool GlobalValue::canIncreaseAlignment() const { + // Firstly, can only increase the alignment of a global if it + // is a strong definition. + if (!isStrongDefinitionForLinker()) + return false; + + // It also has to either not have a section defined, or, not have + // alignment specified. (If it is assigned a section, the global + // could be densely packed with other objects in the section, and + // increasing the alignment could cause padding issues.) + if (hasSection() && getAlignment() > 0) + return false; + + // On ELF platforms, we're further restricted in that we can't + // increase the alignment of any variable which might be emitted + // into a shared library, and which is exported. If the main + // executable accesses a variable found in a shared-lib, the main + // exe actually allocates memory for and exports the symbol ITSELF, + // overriding the symbol found in the library. That is, at link + // time, the observed alignment of the variable is copied into the + // executable binary. (A COPY relocation is also generated, to copy + // the initial data from the shadowed variable in the shared-lib + // into the location in the main binary, before running code.) + // + // And thus, even though you might think you are defining the + // global, and allocating the memory for the global in your object + // file, and thus should be able to set the alignment arbitrarily, + // that's not actually true. Doing so can cause an ABI breakage; an + // executable might have already been built with the previous + // alignment of the variable, and then assuming an increased + // alignment will be incorrect. + + // Conservatively assume ELF if there's no parent pointer. + bool isELF = + (!Parent || Triple(Parent->getTargetTriple()).isOSBinFormatELF()); + if (isELF && hasDefaultVisibility() && !hasLocalLinkage()) + return false; + + return true; +} + //===----------------------------------------------------------------------===// // GlobalVariable Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ecfbe9e2280..9b73c5e9d952 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (AArch64::GPR64RegClass.contains(*I)) @@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index d26604f5765d..685907a2178e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -112,9 +112,21 @@ class AArch64ELFStreamer : public MCELFStreamer { MCELFStreamer::EmitInstruction(Inst, STI); } + /// Emit a 32-bit value as an instruction. This is only used for the .inst + /// directive, EmitInstruction should be used in other cases. void emitInst(uint32_t Inst) { + char Buffer[4]; + + // We can't just use EmitIntValue here, as that will emit a data mapping + // symbol, and swap the endianness on big-endian systems (instructions are + // always little-endian). + for (unsigned I = 0; I < 4; ++I) { + Buffer[I] = uint8_t(Inst); + Inst >>= 8; + } + EmitA64MappingSymbol(); - MCELFStreamer::EmitIntValue(Inst, 4); + MCELFStreamer::EmitBytes(StringRef(Buffer, 4)); } /// This is one of the functions used to emit data into an ELF section, so the diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 37c0795af283..978e99cf511e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (ARM::GPRRegClass.contains(*I)) @@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e8b96e74a7af..ed2e88067168 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI, R8, R9, R10, R11)>; // CSRs that are handled by prologue, epilogue. -def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>; +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>; // CSRs that are handled explicitly via copies. -def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>; +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>; // All GPRs - except r11 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 8632bb8254f9..7f8ce4768c00 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks( unsigned TlsReg, TlsOffset; DebugLoc DL; + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); @@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue( MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); DebugLoc DL; + + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + // HiPE-specific values const unsigned HipeLeafWords = 24; const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; @@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { // If we may need to emit frameless compact unwind information, give // up as this is currently broken: PR25614. - return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); + return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && + // The lowering of segmented stack and HiPE only support entry blocks + // as prologue blocks: PR26107. + // This limitation may be lifted if we fix: + // - adjustForSegmentedStacks + // - adjustForHiPEPrologue + MF.getFunction()->getCallingConv() != CallingConv::HiPE && + !MF.shouldSplitStack(); } MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b723059f091d..6904714ec781 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (X86::GR64RegClass.contains(*I)) @@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 95c50d32c820..76cefd97cd8f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return Changed ? &I : nullptr; } - -/// Analyze the specified subexpression and see if it is capable of providing -/// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in -/// the output of the expression came from a corresponding bit in some other -/// value. This function is recursive, and the end result is a mapping of -/// (value, bitnumber) to bitnumber. It is the caller's responsibility to -/// validate that all `value`s are identical and that the bitnumber to bitnumber -/// mapping is correct for a bswap or bitreverse. -/// -/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know -/// that the expression deposits the low byte of %X into the high byte of the -/// result and that all other bits are zero. This expression is accepted, -/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7]. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bits that the subexpression is later shifted. For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -16 on entry. This is used to specify which -/// bits of BitValues are actually being set. -/// -/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding -/// bit is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 255. BitMask is always in the local -/// (OverallLeftShift) coordinate space. -/// -static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, - SmallVectorImpl &BitValues, - SmallVectorImpl &BitProvenance) { - if (Instruction *I = dyn_cast(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance) || - CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, - BitValues, BitProvenance); - - // If this is a logical shift by a constant, recurse with OverallLeftShift - // and BitMask adjusted. - if (I->isLogicalShift() && isa(I->getOperand(1))) { - unsigned ShAmt = - cast(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined. - if (ShAmt > BitValues.size()) - return true; - - unsigned BitShift = ShAmt; - if (I->getOpcode() == Instruction::Shl) { - // X << C -> collect(X, +C) - OverallLeftShift += BitShift; - BitMask = BitMask.lshr(BitShift); - } else { - // X >>u C -> collect(X, -C) - OverallLeftShift -= BitShift; - BitMask = BitMask.shl(BitShift); - } - - if (OverallLeftShift >= (int)BitValues.size()) - return true; - if (OverallLeftShift <= -(int)BitValues.size()) - return true; - - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance); - } - - // If this is a logical 'and' with a mask that clears bits, clear the - // corresponding bits in BitMask. - if (I->getOpcode() == Instruction::And && - isa(I->getOperand(1))) { - unsigned NumBits = BitValues.size(); - APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); - const APInt &AndMask = cast(I->getOperand(1))->getValue(); - - for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) { - // If this bit is masked out by a later operation, we don't care what - // the and mask is. - if (BitMask[i] == 0) - continue; - - // If the AndMask is zero for this bit, clear the bit. - APInt MaskB = AndMask & Bit; - if (MaskB == 0) { - BitMask.clearBit(i); - continue; - } - - // Otherwise, this bit is kept. - } - - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance); - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap/bitreverse. To be part of a bswap or - // bitreverse we must be demanding a contiguous range of bits from it. - unsigned InputBitLen = BitMask.countPopulation(); - unsigned InputBitNo = BitMask.countTrailingZeros(); - if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo != - InputBitLen) - // Not a contiguous set range of bits! - return true; - - // We know we're moving a contiguous range of bits from the input to the - // output. Record which bits in the output came from which bits in the input. - unsigned DestBitNo = InputBitNo + OverallLeftShift; - for (unsigned I = 0; I < InputBitLen; ++I) - BitProvenance[DestBitNo + I] = InputBitNo + I; - - // If the destination bit value is already defined, the values are or'd - // together, which isn't a bswap/bitreverse (unless it's an or of the same - // bits). - if (BitValues[DestBitNo] && BitValues[DestBitNo] != V) - return true; - for (unsigned I = 0; I < InputBitLen; ++I) - BitValues[DestBitNo + I] = V; - - return false; -} - -static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, - unsigned BitWidth) { - if (From % 8 != To % 8) - return false; - // Convert from bit indices to byte indices and check for a byte reversal. - From >>= 3; - To >>= 3; - BitWidth >>= 3; - return From == BitWidth - To - 1; -} - -static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, - unsigned BitWidth) { - return From == BitWidth - To - 1; -} - /// Given an OR instruction, check to see if this is a bswap or bitreverse /// idiom. If so, insert the new intrinsic and return it. Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) { - IntegerType *ITy = dyn_cast(I.getType()); - if (!ITy) - return nullptr; // Can't do vectors. - unsigned BW = ITy->getBitWidth(); - - /// We keep track of which bit (BitProvenance) inside which value (BitValues) - /// defines each bit in the result. - SmallVector BitValues(BW, nullptr); - SmallVector BitProvenance(BW, -1); - - // Try to find all the pieces corresponding to the bswap. - APInt BitMask = APInt::getAllOnesValue(BitValues.size()); - if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance)) + SmallVector Insts; + if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts)) return nullptr; + Instruction *LastInst = Insts.pop_back_val(); + LastInst->removeFromParent(); - // Check to see if all of the bits come from the same value. - Value *V = BitValues[0]; - if (!V) return nullptr; // Didn't find a bit? Must be zero. - - if (!std::all_of(BitValues.begin(), BitValues.end(), - [&](const Value *X) { return X == V; })) - return nullptr; - - // Now, is the bit permutation correct for a bswap or a bitreverse? We can - // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; - for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { - OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); - OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); - } - - Intrinsic::ID Intrin; - if (OKForBSwap) - Intrin = Intrinsic::bswap; - else if (OKForBitReverse) - Intrin = Intrinsic::bitreverse; - else - return nullptr; - - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy); - return CallInst::Create(F, V); + for (auto *Inst : Insts) + Worklist.Add(Inst); + return LastInst; } /// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 14574119b9a8..79282a2a703b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume( RI->eraseFromParent(); } +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { + if (auto *FPI = dyn_cast(EHPad)) + return FPI->getParentPad(); + return cast(EHPad)->getParentPad(); +} + +typedef DenseMap UnwindDestMemoTy; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + SmallVector Worklist(1, EHPad); + + while (!Worklist.empty()) { + Instruction *CurrentPad = Worklist.pop_back_val(); + // We only put pads on the worklist that aren't in the MemoMap. When + // we find an unwind dest for a pad we may update its ancestors, but + // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, + // so they should never get updated while queued on the worklist. + assert(!MemoMap.count(CurrentPad)); + Value *UnwindDestToken = nullptr; + if (auto *CatchSwitch = dyn_cast(CurrentPad)) { + if (CatchSwitch->hasUnwindDest()) { + UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); + } else { + // Catchswitch doesn't have a 'nounwind' variant, and one might be + // annotated as "unwinds to caller" when really it's nounwind (see + // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the + // parent's unwind dest from this. We can check its catchpads' + // descendants, since they might include a cleanuppad with an + // "unwinds to caller" cleanupret, which can be trusted. + for (auto HI = CatchSwitch->handler_begin(), + HE = CatchSwitch->handler_end(); + HI != HE && !UnwindDestToken; ++HI) { + BasicBlock *HandlerBlock = *HI; + auto *CatchPad = cast(HandlerBlock->getFirstNonPHI()); + for (User *Child : CatchPad->users()) { + // Intentionally ignore invokes here -- since the catchswitch is + // marked "unwind to caller", it would be a verifier error if it + // contained an invoke which unwinds out of it, so any invoke we'd + // encounter must unwind to some child of the catch. + if (!isa(Child) && !isa(Child)) + continue; + + Instruction *ChildPad = cast(Child); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't figure out this child pad yet; queue it. + Worklist.push_back(ChildPad); + continue; + } + // We've already checked this child, but might have found that + // it offers no proof either way. + Value *ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + // We already know the child's unwind dest, which can either + // be ConstantTokenNone to indicate unwind to caller, or can + // be another child of the catchpad. Only the former indicates + // the unwind dest of the catchswitch. + if (isa(ChildUnwindDestToken)) { + UnwindDestToken = ChildUnwindDestToken; + break; + } + assert(getParentPad(ChildUnwindDestToken) == CatchPad); + } + } + } + } else { + auto *CleanupPad = cast(CurrentPad); + for (User *U : CleanupPad->users()) { + if (auto *CleanupRet = dyn_cast(U)) { + if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) + UnwindDestToken = RetUnwindDest->getFirstNonPHI(); + else + UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); + break; + } + Value *ChildUnwindDestToken; + if (auto *Invoke = dyn_cast(U)) { + ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); + } else if (isa(U) || isa(U)) { + Instruction *ChildPad = cast(U); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't resolved this child yet; queue it and keep searching. + Worklist.push_back(ChildPad); + continue; + } + // We've checked this child, but still need to ignore it if it + // had no proof either way. + ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + } else { + // Not a relevant user of the cleanuppad + continue; + } + // In a well-formed program, the child/invoke must either unwind to + // an(other) child of the cleanup, or exit the cleanup. In the + // first case, continue searching. + if (isa(ChildUnwindDestToken) && + getParentPad(ChildUnwindDestToken) == CleanupPad) + continue; + UnwindDestToken = ChildUnwindDestToken; + break; + } + } + // If we haven't found an unwind dest for CurrentPad, we may have queued its + // children, so move on to the next in the worklist. + if (!UnwindDestToken) + continue; + + // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits + // any ancestors of CurrentPad up to but not including UnwindDestToken's + // parent pad. Record this in the memo map, and check to see if the + // original EHPad being queried is one of the ones exited. + Value *UnwindParent; + if (auto *UnwindPad = dyn_cast(UnwindDestToken)) + UnwindParent = getParentPad(UnwindPad); + else + UnwindParent = nullptr; + bool ExitedOriginalPad = false; + for (Instruction *ExitedPad = CurrentPad; + ExitedPad && ExitedPad != UnwindParent; + ExitedPad = dyn_cast(getParentPad(ExitedPad))) { + // Skip over catchpads since they just follow their catchswitches. + if (isa(ExitedPad)) + continue; + MemoMap[ExitedPad] = UnwindDestToken; + ExitedOriginalPad |= (ExitedPad == EHPad); + } + + if (ExitedOriginalPad) + return UnwindDestToken; + + // Continue the search. + } + + // No definitive information is contained within this funclet. + return nullptr; +} + +/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, +/// return that pad instruction. If it unwinds to caller, return +/// ConstantTokenNone. If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke. Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer. Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees. The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + // Catchpads unwind to the same place as their catchswitch; + // redirct any queries on catchpads so the code below can + // deal with just catchswitches and cleanuppads. + if (auto *CPI = dyn_cast(EHPad)) + EHPad = CPI->getCatchSwitch(); + + // Check if we've already determined the unwind dest for this pad. + auto Memo = MemoMap.find(EHPad); + if (Memo != MemoMap.end()) + return Memo->second; + + // Search EHPad and, if necessary, its descendants. + Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); + assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); + if (UnwindDestToken) + return UnwindDestToken; + + // No information is available for this EHPad from itself or any of its + // descendants. An unwind all the way out to a pad in the caller would + // need also to agree with the unwind dest of the parent funclet, so + // search up the chain to try to find a funclet with information. Put + // null entries in the memo map to avoid re-processing as we go up. + MemoMap[EHPad] = nullptr; + Instruction *LastUselessPad = EHPad; + Value *AncestorToken; + for (AncestorToken = getParentPad(EHPad); + auto *AncestorPad = dyn_cast(AncestorToken); + AncestorToken = getParentPad(AncestorToken)) { + // Skip over catchpads since they just follow their catchswitches. + if (isa(AncestorPad)) + continue; + assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); + auto AncestorMemo = MemoMap.find(AncestorPad); + if (AncestorMemo == MemoMap.end()) { + UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); + } else { + UnwindDestToken = AncestorMemo->second; + } + if (UnwindDestToken) + break; + LastUselessPad = AncestorPad; + } + + // Since the whole tree under LastUselessPad has no information, it all must + // match UnwindDestToken; record that to avoid repeating the search. + SmallVector Worklist(1, LastUselessPad); + while (!Worklist.empty()) { + Instruction *UselessPad = Worklist.pop_back_val(); + assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr); + MemoMap[UselessPad] = UnwindDestToken; + if (auto *CatchSwitch = dyn_cast(UselessPad)) { + for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) + for (User *U : HandlerBlock->getFirstNonPHI()->users()) + if (isa(U) || isa(U)) + Worklist.push_back(cast(U)); + } else { + assert(isa(UselessPad)); + for (User *U : UselessPad->users()) + if (isa(U) || isa(U)) + Worklist.push_back(cast(U)); + } + } + + return UnwindDestToken; +} + /// When we inline a basic block into an invoke, /// we have to turn all of the calls that can throw into invokes. /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -static BasicBlock * -HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( + BasicBlock *BB, BasicBlock *UnwindEdge, + UnwindDestMemoTy *FuncletUnwindMap = nullptr) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = &*BBI++; @@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { if (!CI || CI->doesNotThrow() || isa(CI->getCalledValue())) continue; + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { + // This call is nested inside a funclet. If that funclet has an unwind + // destination within the inlinee, then unwinding out of this call would + // be UB. Rewriting this call to an invoke which targets the inlined + // invoke's unwind dest would give the call's parent funclet multiple + // unwind destinations, which is something that subsequent EH table + // generation can't handle and that the veirifer rejects. So when we + // see such a call, leave it as a call. + auto *FuncletPad = cast(FuncletBundle->Inputs[0]); + Value *UnwindDestToken = + getUnwindDestToken(FuncletPad, *FuncletUnwindMap); + if (UnwindDestToken && !isa(UnwindDestToken)) + continue; +#ifndef NDEBUG + Instruction *MemoKey; + if (auto *CatchPad = dyn_cast(FuncletPad)) + MemoKey = CatchPad->getCatchSwitch(); + else + MemoKey = FuncletPad; + assert(FuncletUnwindMap->count(MemoKey) && + (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && + "must get memoized to avoid confusing later searches"); +#endif // NDEBUG + } + // Convert this function call into an invoke instruction. First, split the // basic block. BasicBlock *Split = @@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // This connects all the instructions which 'unwind to caller' to the invoke // destination. + UnwindDestMemoTy FuncletUnwindMap; for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { if (auto *CRI = dyn_cast(BB->getTerminator())) { if (CRI->unwindsToCaller()) { - CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); + auto *CleanupPad = CRI->getCleanupPad(); + CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); CRI->eraseFromParent(); UpdatePHINodes(&*BB); + // Finding a cleanupret with an unwind destination would confuse + // subsequent calls to getUnwindDestToken, so map the cleanuppad + // to short-circuit any such calls and recognize this as an "unwind + // to caller" cleanup. + assert(!FuncletUnwindMap.count(CleanupPad) || + isa(FuncletUnwindMap[CleanupPad])); + FuncletUnwindMap[CleanupPad] = + ConstantTokenNone::get(Caller->getContext()); } } @@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, Instruction *Replacement = nullptr; if (auto *CatchSwitch = dyn_cast(I)) { if (CatchSwitch->unwindsToCaller()) { + Value *UnwindDestToken; + if (auto *ParentPad = + dyn_cast(CatchSwitch->getParentPad())) { + // This catchswitch is nested inside another funclet. If that + // funclet has an unwind destination within the inlinee, then + // unwinding out of this catchswitch would be UB. Rewriting this + // catchswitch to unwind to the inlined invoke's unwind dest would + // give the parent funclet multiple unwind destinations, which is + // something that subsequent EH table generation can't handle and + // that the veirifer rejects. So when we see such a call, leave it + // as "unwind to caller". + UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); + if (UnwindDestToken && !isa(UnwindDestToken)) + continue; + } else { + // This catchswitch has no parent to inherit constraints from, and + // none of its descendants can have an unwind edge that exits it and + // targets another funclet in the inlinee. It may or may not have a + // descendant that definitively has an unwind to caller. In either + // case, we'll have to assume that any unwinds out of it may need to + // be routed to the caller, so treat it as though it has a definitive + // unwind to caller. + UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); + } auto *NewCatchSwitch = CatchSwitchInst::Create( CatchSwitch->getParentPad(), UnwindDest, CatchSwitch->getNumHandlers(), CatchSwitch->getName(), CatchSwitch); for (BasicBlock *PadBB : CatchSwitch->handlers()) NewCatchSwitch->addHandler(PadBB); + // Propagate info for the old catchswitch over to the new one in + // the unwind map. This also serves to short-circuit any subsequent + // checks for the unwind dest of this catchswitch, which would get + // confused if they found the outer handler in the callee. + FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; Replacement = NewCatchSwitch; } } else if (!isa(I)) { @@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) - if (BasicBlock *NewBB = - HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, UnwindDest, &FuncletUnwindMap)) // Update any PHI nodes in the exceptional block to indicate that there // is now a new entry in them. UpdatePHINodes(NewBB); @@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any call instructions into invoke instructions. This is sensitive to which + // funclet pads were top-level in the inlinee, so must be done before + // rewriting the "parent pad" links. + if (auto *II = dyn_cast(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } + // Update the lexical scopes of the new funclets and callsites. // Anything that had 'none' as its parent is now nested inside the callsite's // EHPad. @@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } - // If we are inlining for an invoke instruction, we must make sure to rewrite - // any call instructions into invoke instructions. - if (auto *II = dyn_cast(TheCall)) { - BasicBlock *UnwindDest = II->getUnwindDest(); - Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); - if (isa(FirstNonPHI)) { - HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } else { - HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } - } - // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d2793e5ecb5b..abc9b65f7a39 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -944,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign, const DataLayout &DL) { + assert(PrefAlign > Align); + V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast(V)) { + // TODO: ideally, computeKnownBits ought to have used + // AllocaInst::getAlignment() in its computation already, making + // the below max redundant. But, as it turns out, + // stripPointerCasts recurses through infinite layers of bitcasts, + // while computeKnownBits is not allowed to traverse more than 6 + // levels. + Align = std::max(AI->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. if (DL.exceedsNaturalStackAlignment(PrefAlign)) return Align; - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - return AI->getAlignment(); AI->setAlignment(PrefAlign); return PrefAlign; } if (auto *GO = dyn_cast(V)) { + // TODO: as above, this shouldn't be necessary. + Align = std::max(GO->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. - if (!GO->isStrongDefinitionForLinker()) + if (!GO->canIncreaseAlignment()) return Align; - if (GO->getAlignment() >= PrefAlign) - return GO->getAlignment(); - // We can only increase the alignment of the global if it has no alignment - // specified or if it is not assigned a section. If it is assigned a - // section, the global could be densely packed with other objects in the - // section, increasing the alignment could cause padding issues. - if (!GO->hasSection() || GO->getAlignment() == 0) - GO->setAlignment(PrefAlign); - return GO->getAlignment(); + GO->setAlignment(PrefAlign); + return PrefAlign; } return Align; @@ -1585,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return false; } + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { + BitPart(Value *P, unsigned BW) : Provider(P) { + Provenance.resize(BW); + } + + /// The Value that this is a bitreverse/bswap of. + Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A + /// in Provider becomes bit B in the result of this expression. + SmallVector Provenance; // int8_t means max size is i128. + + enum { Unset = -1 }; +}; + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, + std::map> &BPS) { + auto I = BPS.find(V); + if (I != BPS.end()) + return I->second; + + auto &Result = BPS[V] = None; + auto BitWidth = cast(V->getType())->getBitWidth(); + + if (Instruction *I = dyn_cast(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, + MatchBitReversals, BPS); + if (!A || !B) + return Result; + + // Try and merge the two together. + if (!A->Provider || A->Provider != B->Provider) + return Result; + + Result = BitPart(A->Provider, BitWidth); + for (unsigned i = 0; i < A->Provenance.size(); ++i) { + if (A->Provenance[i] != BitPart::Unset && + B->Provenance[i] != BitPart::Unset && + A->Provenance[i] != B->Provenance[i]) + return Result = None; + + if (A->Provenance[i] == BitPart::Unset) + Result->Provenance[i] = B->Provenance[i]; + else + Result->Provenance[i] = A->Provenance[i]; + } + + return Result; + } + + // If this is a logical shift by a constant, recurse then shift the result. + if (I->isLogicalShift() && isa(I->getOperand(1))) { + unsigned BitShift = + cast(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined. + if (BitShift > BitWidth) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + // Perform the "shift" on BitProvenance. + auto &P = Result->Provenance; + if (I->getOpcode() == Instruction::Shl) { + P.erase(std::prev(P.end(), BitShift), P.end()); + P.insert(P.begin(), BitShift, BitPart::Unset); + } else { + P.erase(P.begin(), std::next(P.begin(), BitShift)); + P.insert(P.end(), BitShift, BitPart::Unset); + } + + return Result; + } + + // If this is a logical 'and' with a mask that clears bits, recurse then + // unset the appropriate bits. + if (I->getOpcode() == Instruction::And && + isa(I->getOperand(1))) { + APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); + const APInt &AndMask = cast(I->getOperand(1))->getValue(); + + // Check that the mask allows a multiple of 8 bits for a bswap, for an + // early exit. + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && NumMaskedBits % 8 != 0) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + // If the AndMask is zero for this bit, clear the bit. + if ((AndMask & Bit) == 0) + Result->Provenance[i] = BitPart::Unset; + + return Result; + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap/bitreverse. + Result = BitPart(V, BitWidth); + for (unsigned i = 0; i < BitWidth; ++i) + Result->Provenance[i] = i; + return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBitReverseOrBSwapIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl &InsertedInsts) { + if (Operator::getOpcode(I) != Instruction::Or) + return false; + if (!MatchBSwaps && !MatchBitReversals) + return false; + IntegerType *ITy = dyn_cast(I->getType()); + if (!ITy || ITy->getBitWidth() > 128) + return false; // Can't do vectors or integers > 128 bits. + unsigned BW = ITy->getBitWidth(); + + // Try to find all the pieces corresponding to the bswap. + std::map> BPS; + auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); + if (!Res) + return false; + auto &BitProvenance = Res->Provenance; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; + for (unsigned i = 0; i < BW; ++i) { + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); + OKForBitReverse &= + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap && MatchBSwaps) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse && MatchBitReversals) + Intrin = Intrinsic::bitreverse; + else + return false; + + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); + InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); + return true; +} diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc0744060142..2f3c31128cf0 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) { return nullptr; } -//===----------------------------------------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' +/// Any floating-point library function that we're trying to simplify will have +/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). +/// CheckDoubleTy indicates that 'fptype' must be 'double'. +static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, + bool CheckDoubleTy) { + FunctionType *FT = F->getFunctionType(); + if (FT->getNumParams() != NumParams) + return false; -Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, - bool CheckRetType) { + // The return type must match what we're looking for. + Type *RetTy = FT->getReturnType(); + if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) + return false; + + // Each parameter must match the return type, and therefore, match every other + // parameter too. + for (const Type *ParamTy : FT->params()) + if (ParamTy != RetTy) + return false; + + return true; +} + +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, + bool CheckRetType) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) + if (!matchesFPLibFunctionSignature(Callee, 1, true)) return nullptr; if (CheckRetType) { @@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, return B.CreateFPExt(V, B.getDoubleTy()); } -// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' -Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) + if (!matchesFPLibFunctionSignature(Callee, 2, true)) return nullptr; // If this is something like 'fmin((double)floatval1, (double)floatval2)', @@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - + Value *Ret = nullptr; if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); + // FIXME: Refactor - this check is repeated all over this file and even in the + // preceding call to shrink double -> float. + + // Make sure this has 1 argument of FP type, which matches the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + if (!CI->hasUnsafeAlgebra()) return Ret; diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll index a9ae00c8d270..9996c0d3aba8 100644 --- a/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -8,6 +8,7 @@ @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -74,3 +75,29 @@ __tls_init.exit: ; CHECK-NOT: ldp d27, d26 ; CHECK-NOT: ldp d29, d28 ; CHECK-NOT: ldp d31, d30 + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: stp d31, d30 +; CHECK-NOT: stp d29, d28 +; CHECK-NOT: stp d27, d26 +; CHECK-NOT: stp d25, d24 +; CHECK-NOT: stp d23, d22 +; CHECK-NOT: stp d21, d20 +; CHECK-NOT: stp d19, d18 +; CHECK-NOT: stp d17, d16 +; CHECK-NOT: stp d7, d6 +; CHECK-NOT: stp d5, d4 +; CHECK-NOT: stp d3, d2 +; CHECK-NOT: stp d1, d0 +; CHECK-NOT: stp x20, x19 +; CHECK-NOT: stp x14, x13 +; CHECK-NOT: stp x12, x11 +; CHECK-NOT: stp x10, x9 +; CHECK-NOT: stp x8, x7 +; CHECK-NOT: stp x6, x5 +; CHECK-NOT: stp x4, x3 +; CHECK-NOT: stp x2, x1 +; CHECK: blr +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/cse-flags.ll b/test/CodeGen/ARM/cse-flags.ll new file mode 100644 index 000000000000..c18e2fcb6039 --- /dev/null +++ b/test/CodeGen/ARM/cse-flags.ll @@ -0,0 +1,43 @@ +; RUN: llc -asm-verbose=false < %s | FileCheck %s +; PR26063 + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK: .LBB0_1: +; CHECK-NEXT: bl f{{$}} +; CHECK-NEXT: ldrb r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}} +; CHECK-NEXT: cmp r{{[0-9]+}}, #1{{$}} +; CHECK-NEXT: cmpne r[[T0]], #0{{$}} +; CHECK-NEXT: bne .LBB0_1{{$}} +define i8* @h(i8* readonly %a, i32 %b, i32 %c) { +entry: + %0 = load i8, i8* %a, align 1 + %tobool4 = icmp ne i8 %0, 0 + %cmp5 = icmp ne i32 %b, 1 + %1 = and i1 %cmp5, %tobool4 + br i1 %1, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ] + %call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)() + %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1 + %2 = load i8, i8* %incdec.ptr, align 1 + %tobool = icmp ne i8 %2, 0 + %cmp = icmp ne i32 %call, 1 + %3 = and i1 %cmp, %tobool + br i1 %3, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ] + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ] + ret i8* %a.addr.0.lcssa +} + +declare i32 @f(...) diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll index 7b776d4b8e88..11173bbb1978 100644 --- a/test/CodeGen/ARM/cxx-tlscc.ll +++ b/test/CodeGen/ARM/cxx-tlscc.ll @@ -8,6 +8,7 @@ @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) @@ -44,3 +45,13 @@ __tls_init.exit: ; CHECK-NOT: pop {r9, r12} ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc} ; CHECK: pop {lr} + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: push {r1, r2, r3, r4, r7, lr} +; CHECK-NOT: push {r9, r12} +; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} +; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7} +; CHECK: blx +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index 66743f3e9d5e..46fef7629cc4 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS -; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN -; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI -; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI -; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI +; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK +; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK +; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK define void @f1(i8* %dest, i8* %src) { entry: @@ -402,8 +402,8 @@ entry: ; CHECK: arr1: ; CHECK-IOS: .align 3 ; CHECK-DARWIN: .align 2 -; CHECK-EABI: .align 2 -; CHECK-GNUEABI: .align 2 +; CHECK-EABI-NOT: .align +; CHECK-GNUEABI-NOT: .align ; CHECK: arr2: ; CHECK: {{\.section.+foo,bar}} ; CHECK-NOT: .align diff --git a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll b/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll deleted file mode 100644 index 4580795880ab..000000000000 --- a/test/CodeGen/X86/2014-05-30-CombineAddNSW.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s -; CHECK: addl - -; The two additions are the same , but have different flags. -; In theory this code should never be generated by the frontend, but this -; tries to test that two identical instructions with two different flags -; actually generate two different nodes. -; -; Normally the combiner would see this condition without the flags -; and optimize the result of the sub into a register clear -; (the final result would be 0). With the different flags though the combiner -; needs to keep the add + sub nodes, because the two nodes result as different -; nodes and so cannot assume that the subtraction of the two nodes -; generates 0 as result -define i32 @foo(i32 %a, i32 %b) { - %1 = add i32 %a, %b - %2 = add nsw i32 %a, %b - %3 = sub i32 %1, %2 - ret i32 %3 -} diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll index 70fe501040bf..6c8e45e42d15 100644 --- a/test/CodeGen/X86/cxx_tlscc64.ll +++ b/test/CodeGen/X86/cxx_tlscc64.ll @@ -4,11 +4,13 @@ ; tricks similar to AArch64 fast TLS calling convention (r255821). ; Applying tricks on x86-64 similar to r255821. ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0 %struct.S = type { i8 } @sg = internal thread_local global %struct.S zeroinitializer, align 1 @__dso_handle = external global i8 @__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 declare void @_ZN1SC1Ev(%struct.S*) declare void @_ZN1SD1Ev(%struct.S*) @@ -50,3 +52,28 @@ init.i: __tls_init.exit: ret %struct.S* @sg } + +; CHECK-LABEL: _ZTW4sum1 +; CHECK-NOT: pushq %r11 +; CHECK-NOT: pushq %r10 +; CHECK-NOT: pushq %r9 +; CHECK-NOT: pushq %r8 +; CHECK-NOT: pushq %rsi +; CHECK-NOT: pushq %rdx +; CHECK-NOT: pushq %rcx +; CHECK-NOT: pushq %rbx +; CHECK: callq +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} + +; Make sure at O0 we don't overwrite RBP. +; CHECK-O0-LABEL: _ZTW4sum2 +; CHECK-O0: pushq %rbp +; CHECK-O0: movq %rsp, %rbp +; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) +define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { + ret i32* @sum1 +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index 7c00f407b1e0..eb87f7101d7c 100644 --- a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -1,11 +1,5 @@ ; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK ; -; This test checks that we do not use shrink-wrapping when -; the function does not have any frame pointer and may unwind. -; This is a workaround for a limitation in the emission of -; the CFI directives, that are not correct in such case. -; PR25614 -; ; Note: This test cannot be merged with the shrink-wrapping tests ; because the booleans set on the command line take precedence on ; the target logic that disable shrink-wrapping. @@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "x86_64-apple-macosx" +; This test checks that we do not use shrink-wrapping when +; the function does not have any frame pointer and may unwind. +; This is a workaround for a limitation in the emission of +; the CFI directives, that are not correct in such case. +; PR25614 +; ; No shrink-wrapping should occur here, until the CFI information are fixed. ; CHECK-LABEL: framelessUnwind: ; @@ -151,3 +151,74 @@ false: } attributes #2 = { "no-frame-pointer-elim"="false" nounwind } + + +; Check that we generate correct code for segmented stack. +; We used to emit the code at the entry point of the function +; instead of just before the prologue. +; For now, shrink-wrapping is disabled on segmented stack functions: PR26107. +; +; CHECK-LABEL: segmentedStack: +; CHECK: cmpq +; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]] +; +; CHECK: callq ___morestack +; CHECK-NEXT: retq +; +; CHECK: [[ENTRY_LABEL]]: +; Prologue +; CHECK: push +; +; In PR26107, we use to drop these two basic blocks, because +; the segmentedStack entry block was jumping directly to +; the place where the prologue is actually needed, which is +; the call to memcmp. +; Then, those two basic blocks did not have any predecessors +; anymore and were removed. +; +; Check if vk1 is null +; CHECK: testq %rdi, %rdi +; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]] +; +; Check if vk2 is null +; CHECK: testq %rsi, %rsi +; CHECK-NEXT: je [[STRINGS_EQUAL]] +; +; CHECK: [[STRINGS_EQUAL]] +; CHECK-NEXT: popq +define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 { +entry: + %cmp.i = icmp eq i8* %vk1, null + %cmp1.i = icmp eq i8* %vk2, null + %brmerge.i = or i1 %cmp.i, %cmp1.i + %cmp1.mux.i = and i1 %cmp.i, %cmp1.i + br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i + +if.end4.i: ; preds = %entry + %tmp = getelementptr inbounds i8, i8* %vk1, i64 8 + %tmp1 = bitcast i8* %tmp to i64* + %tmp2 = load i64, i64* %tmp1, align 8 + %tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8 + %tmp4 = bitcast i8* %tmp3 to i64* + %tmp5 = load i64, i64* %tmp4, align 8 + %cmp.i.i = icmp eq i64 %tmp2, %tmp5 + br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit + +land.rhs.i.i: ; preds = %if.end4.i + %tmp6 = bitcast i8* %vk2 to i8** + %tmp7 = load i8*, i8** %tmp6, align 8 + %tmp8 = bitcast i8* %vk1 to i8** + %tmp9 = load i8*, i8** %tmp8, align 8 + %call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5 + %cmp4.i.i = icmp eq i32 %call.i.i, 0 + br label %__go_ptr_strings_equal.exit + +__go_ptr_strings_equal.exit: ; preds = %land.rhs.i.i, %if.end4.i, %entry + %retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ] + ret i1 %retval.0.i +} + +; Function Attrs: nounwind readonly +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5 + +attributes #5 = { nounwind readonly ssp uwtable "split-stack" } diff --git a/test/DebugInfo/ARM/PR26163.ll b/test/DebugInfo/ARM/PR26163.ll new file mode 100644 index 000000000000..9ab0e35805c1 --- /dev/null +++ b/test/DebugInfo/ARM/PR26163.ll @@ -0,0 +1,107 @@ +; RUN: llc -filetype=obj -o - < %s | llvm-dwarfdump - | FileCheck %s +; +; Checks that we're creating two ranges, one that terminates immediately +; and one that spans the rest of the function. This isn't necessarily the +; best thing to do here (and also not necessarily correct, since the first +; one has a bit_piece), but it is what is currently being emitted, any +; change here needs to be intentional, so the test is very specific. +; +; CHECK: .debug_loc contents: +; CHECK: 0x00000000: Beginning address offset: 0x0000000000000004 +; CHECK: Ending address offset: 0x0000000000000004 +; CHECK: Location description: 10 00 9f +; CHECK: Beginning address offset: 0x0000000000000004 +; CHECK: Ending address offset: 0x0000000000000014 +; CHECK: Location description: 10 00 9f + +; Created form the following test case (PR26163) with +; clang -cc1 -triple armv4t--freebsd11.0-gnueabi -emit-obj -debug-info-kind=standalone -O2 -x c test.c +; +; typedef unsigned int size_t; +; struct timeval { +; long long tv_sec; +; int tv_usec; +; }; +; +; void *memset(void *, int, size_t); +; void foo(void); +; +; static void +; bar(int value) +; { +; struct timeval lifetime; +; +; memset(&lifetime, 0, sizeof(struct timeval)); +; lifetime.tv_sec = value; +; +; foo(); +; } +; +; int +; parse_config_file(void) +; { +; int value; +; +; bar(value); +; return (0); +; } + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t--freebsd11.0-gnueabi" + +%struct.timeval = type { i64, i32 } + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +declare void @foo() + +define i32 @parse_config_file() !dbg !4 { +entry: + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !26), !dbg !27 + tail call void @llvm.dbg.declare(metadata %struct.timeval* undef, metadata !16, metadata !26), !dbg !29 + tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !16, metadata !30), !dbg !29 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !31), !dbg !29 + tail call void @foo() #3, !dbg !32 + ret i32 0, !dbg !33 +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!22, !23, !24} +!llvm.ident = !{!25} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "", directory: "/home/ubuntu/bugs") +!2 = !{} +!3 = !{!4, !11} +!4 = distinct !DISubprogram(name: "parse_config_file", scope: !5, file: !5, line: 22, type: !6, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, variables: !9) +!5 = !DIFile(filename: "test.c", directory: "/home/ubuntu/bugs") +!6 = !DISubroutineType(types: !7) +!7 = !{!8} +!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !DILocalVariable(name: "value", scope: !4, file: !5, line: 24, type: !8) +!11 = distinct !DISubprogram(name: "bar", scope: !5, file: !5, line: 11, type: !12, isLocal: true, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !14) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !8} +!14 = !{!15, !16} +!15 = !DILocalVariable(name: "value", arg: 1, scope: !11, file: !5, line: 11, type: !8) +!16 = !DILocalVariable(name: "lifetime", scope: !11, file: !5, line: 13, type: !17) +!17 = !DICompositeType(tag: DW_TAG_structure_type, name: "timeval", file: !5, line: 2, size: 128, align: 64, elements: !18) +!18 = !{!19, !21} +!19 = !DIDerivedType(tag: DW_TAG_member, name: "tv_sec", scope: !17, file: !5, line: 3, baseType: !20, size: 64, align: 64) +!20 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!21 = !DIDerivedType(tag: DW_TAG_member, name: "tv_usec", scope: !17, file: !5, line: 4, baseType: !8, size: 32, align: 32, offset: 64) +!22 = !{i32 2, !"Debug Info Version", i32 3} +!23 = !{i32 1, !"wchar_size", i32 4} +!24 = !{i32 1, !"min_enum_size", i32 4} +!25 = !{!"clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)"} +!26 = !DIExpression() +!27 = !DILocation(line: 11, scope: !11, inlinedAt: !28) +!28 = distinct !DILocation(line: 26, scope: !4) +!29 = !DILocation(line: 13, scope: !11, inlinedAt: !28) +!30 = !DIExpression(DW_OP_bit_piece, 0, 64) +!31 = !DIExpression(DW_OP_bit_piece, 0, 32) +!32 = !DILocation(line: 18, scope: !11, inlinedAt: !28) +!33 = !DILocation(line: 27, scope: !4) diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll index 7df88b1ec5e0..b91a0438a679 100644 --- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll @@ -1,5 +1,5 @@ ; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 declare i32 @FB() diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll index d35418b19c7f..94938a86cba4 100644 --- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll +++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll @@ -1,5 +1,5 @@ ; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 declare i32 @FB() diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll index 0d1a1ec6871a..72449f3af3ad 100644 --- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 define i32 @bar() nounwind { ret i32 0 diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll index 31ed7523db43..31271b594c02 100644 --- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; This test should fail until remote symbol resolution is supported. define i32 @main() nounwind { diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll index bbeab10cd788..9d1abbcf847c 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; The intention of this test is to verify that symbols mapped to COMMON in ELF ; work as expected. diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll index 0aa19b244c04..afa8a95f454d 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; Check that a variable is always aligned as specified. diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll index 13bac29a3628..f9961593c7b9 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 define double @test(double* %DP, double %Arg) nounwind { %D = load double, double* %DP ; [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll index 5d5480e9d459..329dc5c83950 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll index ef74fa02e6a9..44557ea399b5 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll @@ -1,6 +1,6 @@ ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -relocation-model=pic -code-model=small %s > /dev/null -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32 @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll index c2260fc2f1ff..a249c2f097e1 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1 @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll index 2a45472b25a1..281705383339 100644 --- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll @@ -1,6 +1,6 @@ ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -O0 -relocation-model=pic -code-model=small %s -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1 @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll index 249aad2d4b48..6fbb2bc3c4bd 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 declare i32 @FB() diff --git a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll index 32c58ee6237b..ce094174134b 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 declare i32 @FB() diff --git a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll index aaf3ebc9bc7f..bc477c285515 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 define i32 @bar() nounwind { ret i32 0 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll index a0d941049c4a..001a617b97a3 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; This test should fail until remote symbol resolution is supported. define i32 @main() nounwind { diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll index 9b4e2469665f..4c4256e45a35 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; The intention of this test is to verify that symbols mapped to COMMON in ELF ; work as expected. diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll index 88a561b613ef..1621501a31a1 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 ; Check that a variable is always aligned as specified. diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll index 484541ab4807..6ff8704da1f9 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 define double @test(double* %DP, double %Arg) nounwind { %D = load double, double* %DP ; [#uses=1] diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll index adc3e944b639..a7c8bfef938f 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null -; XFAIL: win32 +; XFAIL: mingw32,win32 @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll index 8ab3fd591388..a028df674648 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll @@ -1,6 +1,6 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -relocation-model=pic -code-model=small %s > /dev/null -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32 @count = global i32 1, align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll index a47c801e799b..d369d2b38498 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll @@ -1,5 +1,5 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s -; XFAIL: win32 +; XFAIL: mingw32,win32 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1 @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll index 210ac6f6ed1c..e918dabe1772 100644 --- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll +++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll @@ -1,6 +1,6 @@ ; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -O0 -relocation-model=pic -code-model=small %s -; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32 +; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1 @ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4 diff --git a/test/MC/AArch64/inst-directive.s b/test/MC/AArch64/inst-directive.s index 3bb620f689d1..7fd5200b9e57 100644 --- a/test/MC/AArch64/inst-directive.s +++ b/test/MC/AArch64/inst-directive.s @@ -1,7 +1,14 @@ // RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=asm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-ASM -// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o - \ -// RUN: | llvm-readobj -s -sd | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS + +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=asm -o - \ +// RUN: | FileCheck %s --check-prefix=CHECK-ASM +// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=obj -o %t +// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ +// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS .section .inst.aarch64_inst @@ -22,3 +29,7 @@ aarch64_inst: // CHECK-OBJ: SectionData ( // CHECK-OBJ-NEXT: 0000: 2040105E // CHECK-OBJ-NEXT: ) + +// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d +// CHECK-SYMS: 0000000000000000 .inst.aarch64_inst 00000000 $x +// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d diff --git a/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll b/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll new file mode 100644 index 000000000000..36440da21626 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7--linux-gnueabihf" + +; CHECK-LABEL: @f +define i32 @f(i32 %a) #0 { +; CHECK: call i32 @llvm.bitreverse.i32 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %or + +for.body: ; preds = %for.body, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ] + %shr = lshr i32 %a, %i.08 + %and = and i32 %shr, 1 + %sub = sub nuw nsw i32 31, %i.08 + %shl = shl i32 %and, %sub + %or = or i32 %shl, %b.07 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, 32 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3 +} + +attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b7441a0f42c43a8eea9e3e706be187252db747fa)"} +!3 = distinct !{!3, !4} +!4 = !{!"llvm.loop.unroll.full"} diff --git a/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg b/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg new file mode 100644 index 000000000000..98c6700c209d --- /dev/null +++ b/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/CodeGenPrepare/bitreverse-hang.ll b/test/Transforms/CodeGenPrepare/bitreverse-hang.ll new file mode 100644 index 000000000000..c81dcc15cae9 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/bitreverse-hang.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-unroll -codegenprepare -S | FileCheck %s + +; This test is a worst-case scenario for bitreversal/byteswap detection. +; After loop unrolling (the unrolled loop is unreadably large so it has been kept +; rolled here), we have a binary tree of OR operands (as bitreversal detection +; looks straight through shifts): +; +; OR +; | \ +; | LSHR +; | / +; OR +; | \ +; | LSHR +; | / +; OR +; +; This results in exponential runtime. The loop here is 32 iterations which will +; totally hang if we don't deal with this case cleverly. + +@b = common global i32 0, align 4 + +; CHECK: define i32 @fn1 +define i32 @fn1() #0 { +entry: + %b.promoted = load i32, i32* @b, align 4, !tbaa !2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ] + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %shr = lshr i32 %or4, 1 + %or = or i32 %shr, %or4 + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 32 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + store i32 %or, i32* @b, align 4, !tbaa !2 + ret i32 undef +} + +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/Inline/inline-funclets.ll b/test/Transforms/Inline/inline-funclets.ll new file mode 100644 index 000000000000..362e03d36a32 --- /dev/null +++ b/test/Transforms/Inline/inline-funclets.ll @@ -0,0 +1,455 @@ +; RUN: opt -inline -S %s | FileCheck %s + +declare void @g() + + +;;; Test with a call in a funclet that needs to remain a call +;;; when inlined because the funclet doesn't unwind to caller. +;;; CHECK-LABEL: define void @test1( +define void @test1() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test1_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test1_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup.inner +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup_inner:.+]] + +cleanup.inner: + %pad.inner = cleanuppad within none [] + call void @g() [ "funclet"(token %pad.inner) ] + cleanupret from %pad.inner unwind label %cleanup.outer +; CHECK: [[cleanup_inner]]: +; The call here needs to remain a call becuase pad.inner has a cleanupret +; that stays within the inlinee. +; CHECK-NEXT: %[[pad_inner:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: call void @g() [ "funclet"(token %[[pad_inner]]) ] +; CHECK-NEXT: cleanupret from %[[pad_inner]] unwind label %[[cleanup_outer:.+]] + +cleanup.outer: + %pad.outer = cleanuppad within none [] + call void @g() [ "funclet"(token %pad.outer) ] + cleanupret from %pad.outer unwind to caller +; CHECK: [[cleanup_outer]]: +; The call and cleanupret here need to be redirected to caller cleanup +; CHECK-NEXT: %[[pad_outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad_outer]]) ] +; CHECK-NEXT: unwind label %cleanup +; CHECK: cleanupret from %[[pad_outer]] unwind label %cleanup{{$}} + +exit: + ret void +} + + + +;;; Test with an "unwind to caller" catchswitch in a parent funclet +;;; that needs to remain "unwind to caller" because the parent +;;; doesn't unwind to caller. +;;; CHECK-LABEL: define void @test2( +define void @test2() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test2_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test2_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup1 +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup1:.+]] + +cleanup1: + %outer = cleanuppad within none [] + invoke void @g() [ "funclet"(token %outer) ] + to label %ret1 unwind label %catchswitch +; CHECK: [[cleanup1]]: +; CHECK-NEXT: %[[outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[outer]]) ] +; CHECK-NEXT: unwind label %[[catchswitch:.+]] + +catchswitch: + %cs = catchswitch within %outer [label %catch] unwind to caller +; CHECK: [[catchswitch]]: +; The catchswitch here needs to remain "unwind to caller" since %outer +; has a cleanupret that remains within the inlinee. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[outer]] [label %[[catch:.+]]] unwind to caller + +catch: + %inner = catchpad within %cs [] + call void @g() [ "funclet"(token %inner) ] + catchret from %inner to label %ret1 +; CHECK: [[catch]]: +; The call here needs to remain a call since it too is within %outer +; CHECK: %[[inner:[^ ]+]] = catchpad within %[[cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[inner]]) ] + +ret1: + cleanupret from %outer unwind label %cleanup2 +; CHECK: cleanupret from %[[outer]] unwind label %[[cleanup2:.+]] + +cleanup2: + %later = cleanuppad within none [] + cleanupret from %later unwind to caller +; CHECK: [[cleanup2]]: +; The cleanupret here needs to get redirected to the caller cleanup +; CHECK-NEXT: %[[later:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: cleanupret from %[[later]] unwind label %cleanup{{$}} + +exit: + ret void +} + + +;;; Test with a call in a cleanup that has no definitive unwind +;;; destination, that must be rewritten to an invoke. +;;; CHECK-LABEL: define void @test3( +define void @test3() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test3_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test3_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup:.+]] + +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + unreachable +; CHECK: [[cleanup]]: +; The call must be rewritten to an invoke targeting the caller cleanup +; because it may well unwind to there. +; CHECK-NEXT: %[[pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +exit: + ret void +} + + +;;; Test with a catchswitch in a cleanup that has no definitive +;;; unwind destination, that must be rewritten to unwind to the +;;; inlined invoke's unwind dest +;;; CHECK-LABEL: define void @test4( +define void @test4() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test4_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test4_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup:.+]] + +cleanup: + %clean = cleanuppad within none [] + invoke void @g() [ "funclet"(token %clean) ] + to label %unreachable unwind label %dispatch +; CHECK: [[cleanup]]: +; CHECK-NEXT: %[[clean:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[clean]]) ] +; CHECK-NEXT: unwind label %[[dispatch:.+]] + +dispatch: + %cs = catchswitch within %clean [label %catch] unwind to caller +; CHECK: [[dispatch]]: +; The catchswitch must be rewritten to unwind to %cleanup in the caller +; because it may well unwind to there. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[clean]] [label %[[catch:.+]]] unwind label %cleanup{{$}} + +catch: + catchpad within %cs [] + br label %unreachable +unreachable: + unreachable +exit: + ret void +} + + +;;; Test with multiple levels of nesting, and unwind dests +;;; that need to be inferred from ancestors, descendants, +;;; and cousins. +;;; CHECK-LABEL: define void @test5( +define void @test5() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test5_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test5_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %cont unwind label %noinfo.root +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label %[[cont:[^ ]+]] unwind label %[[noinfo_root:.+]] + +noinfo.root: + %noinfo.root.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %noinfo.root.pad) ] + invoke void @g() [ "funclet"(token %noinfo.root.pad) ] + to label %noinfo.root.cont unwind label %noinfo.left +; CHECK: [[noinfo_root]]: +; Nothing under "noinfo.root" has a definitive unwind destination, so +; we must assume all of it may actually unwind, and redirect unwinds +; to the cleanup in the caller. +; CHECK-NEXT: %[[noinfo_root_pad:[^ ]+]] = cleanuppad within none [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: to label %[[noinfo_root_cont:[^ ]+]] unwind label %[[noinfo_left:.+]] + +noinfo.left: + %noinfo.left.pad = cleanuppad within %noinfo.root.pad [] + invoke void @g() [ "funclet"(token %noinfo.left.pad) ] + to label %unreachable unwind label %noinfo.left.child +; CHECK: [[noinfo_left]]: +; CHECK-NEXT: %[[noinfo_left_pad:[^ ]+]] = cleanuppad within %[[noinfo_root_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_left_child:.+]] + +noinfo.left.child: + %noinfo.left.child.cs = catchswitch within %noinfo.left.pad [label %noinfo.left.child.catch] unwind to caller +; CHECK: [[noinfo_left_child]]: +; CHECK-NEXT: %[[noinfo_left_child_cs:[^ ]+]] = catchswitch within %[[noinfo_left_pad]] [label %[[noinfo_left_child_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.left.child.catch: + %noinfo.left.child.pad = catchpad within %noinfo.left.child.cs [] + call void @g() [ "funclet"(token %noinfo.left.child.pad) ] + br label %unreachable +; CHECK: [[noinfo_left_child_catch]]: +; CHECK-NEXT: %[[noinfo_left_child_pad:[^ ]+]] = catchpad within %[[noinfo_left_child_cs]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_child_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +noinfo.root.cont: + invoke void @g() [ "funclet"(token %noinfo.root.pad) ] + to label %unreachable unwind label %noinfo.right +; CHECK: [[noinfo_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_right:.+]] + +noinfo.right: + %noinfo.right.cs = catchswitch within %noinfo.root.pad [label %noinfo.right.catch] unwind to caller +; CHECK: [[noinfo_right]]: +; CHECK-NEXT: %[[noinfo_right_cs:[^ ]+]] = catchswitch within %[[noinfo_root_pad]] [label %[[noinfo_right_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.right.catch: + %noinfo.right.pad = catchpad within %noinfo.right.cs [] + invoke void @g() [ "funclet"(token %noinfo.right.pad) ] + to label %unreachable unwind label %noinfo.right.child +; CHECK: [[noinfo_right_catch]]: +; CHECK-NEXT: %[[noinfo_right_pad:[^ ]+]] = catchpad within %[[noinfo_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_right_child:.+]] + +noinfo.right.child: + %noinfo.right.child.pad = cleanuppad within %noinfo.right.pad [] + call void @g() [ "funclet"(token %noinfo.right.child.pad) ] + br label %unreachable +; CHECK: [[noinfo_right_child]]: +; CHECK-NEXT: %[[noinfo_right_child_pad:[^ ]+]] = cleanuppad within %[[noinfo_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_child_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +cont: + invoke void @g() + to label %exit unwind label %implicit.root +; CHECK: [[cont]]: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[implicit_root:.+]] + +implicit.root: + %implicit.root.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %implicit.root.pad) ] + invoke void @g() [ "funclet"(token %implicit.root.pad) ] + to label %implicit.root.cont unwind label %implicit.left +; CHECK: [[implicit_root]]: +; There's an unwind edge to %internal in implicit.right, and we need to propagate that +; fact down to implicit.right.grandchild, up to implicit.root, and down to +; implicit.left.child.catch, leaving all calls and "unwind to caller" catchswitches +; alone to so they don't conflict with the unwind edge in implicit.right +; CHECK-NEXT: %[[implicit_root_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: to label %[[implicit_root_cont:[^ ]+]] unwind label %[[implicit_left:.+]] + +implicit.left: + %implicit.left.pad = cleanuppad within %implicit.root.pad [] + invoke void @g() [ "funclet"(token %implicit.left.pad) ] + to label %unreachable unwind label %implicit.left.child +; CHECK: [[implicit_left]]: +; CHECK-NEXT: %[[implicit_left_pad:[^ ]+]] = cleanuppad within %[[implicit_root_pad:[^ ]+]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_left_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_left_child:.+]] + +implicit.left.child: + %implicit.left.child.cs = catchswitch within %implicit.left.pad [label %implicit.left.child.catch] unwind to caller +; CHECK: [[implicit_left_child]]: +; CHECK-NEXT: %[[implicit_left_child_cs:[^ ]+]] = catchswitch within %[[implicit_left_pad]] [label %[[implicit_left_child_catch:[^ ]+]]] unwind to caller + +implicit.left.child.catch: + %implicit.left.child.pad = catchpad within %implicit.left.child.cs [] + call void @g() [ "funclet"(token %implicit.left.child.pad) ] + br label %unreachable +; CHECK: [[implicit_left_child_catch]]: +; CHECK-NEXT: %[[implicit_left_child_pad:[^ ]+]] = catchpad within %[[implicit_left_child_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_left_child_pad]]) ] + +implicit.root.cont: + invoke void @g() [ "funclet"(token %implicit.root.pad) ] + to label %unreachable unwind label %implicit.right +; CHECK: [[implicit_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right:.+]] + +implicit.right: + %implicit.right.cs = catchswitch within %implicit.root.pad [label %implicit.right.catch] unwind label %internal +; CHECK: [[implicit_right]]: +; This is the unwind edge (to %internal) whose existence needs to get propagated around the "implicit" tree +; CHECK-NEXT: %[[implicit_right_cs:[^ ]+]] = catchswitch within %[[implicit_root_pad]] [label %[[implicit_right_catch:[^ ]+]]] unwind label %[[internal:.+]] + +implicit.right.catch: + %implicit.right.pad = catchpad within %implicit.right.cs [] + invoke void @g() [ "funclet"(token %implicit.right.pad) ] + to label %unreachable unwind label %implicit.right.child +; CHECK: [[implicit_right_catch]]: +; CHECK-NEXT: %[[implicit_right_pad:[^ ]+]] = catchpad within %[[implicit_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right_child:.+]] + +implicit.right.child: + %implicit.right.child.pad = cleanuppad within %implicit.right.pad [] + invoke void @g() [ "funclet"(token %implicit.right.child.pad) ] + to label %unreachable unwind label %implicit.right.grandchild +; CHECK: [[implicit_right_child]]: +; CHECK-NEXT: %[[implicit_right_child_pad:[^ ]+]] = cleanuppad within %[[implicit_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_child_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right_grandchild:.+]] + +implicit.right.grandchild: + %implicit.right.grandchild.cs = catchswitch within %implicit.right.child.pad [label %implicit.right.grandchild.catch] unwind to caller +; CHECK: [[implicit_right_grandchild]]: +; CHECK-NEXT: %[[implicit_right_grandchild_cs:[^ ]+]] = catchswitch within %[[implicit_right_child_pad]] [label %[[implicit_right_grandchild_catch:[^ ]+]]] unwind to caller + +implicit.right.grandchild.catch: + %implicit.right.grandhcild.pad = catchpad within %implicit.right.grandchild.cs [] + call void @g() [ "funclet"(token %implicit.right.grandhcild.pad) ] + br label %unreachable +; CHECK: [[implicit_right_grandchild_catch]]: +; CHECK-NEXT: %[[implicit_right_grandhcild_pad:[^ ]+]] = catchpad within %[[implicit_right_grandchild_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_right_grandhcild_pad]]) ] + +internal: + %internal.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %internal.pad) ] + cleanupret from %internal.pad unwind to caller +; CHECK: [[internal]]: +; internal is a cleanup with a "return to caller" cleanuppad; that needs to get redirected +; to %cleanup in the caller, and the call needs to get similarly rewritten to an invoke. +; CHECK-NEXT: %[[internal_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %internal.pad.i) ] +; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: cleanupret from %[[internal_pad]] unwind label %cleanup{{$}} + +unreachable: + unreachable +exit: + ret void +} + + +declare void @ProcessCLRException() + +; Make sure the logic doesn't get tripped up when the inlined invoke is +; itself within a funclet in the caller. +; CHECK-LABEL: define void @test6( +define void @test6() personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %callsite_parent +callsite_parent: + %callsite_parent.pad = cleanuppad within none [] +; CHECK: %callsite_parent.pad = cleanuppad within none + invoke void @test6_inlinee() [ "funclet"(token %callsite_parent.pad) ] + to label %ret unwind label %cleanup +ret: + cleanupret from %callsite_parent.pad unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test6_inlinee() alwaysinline personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %inlinee_cleanup +; CHECK-NEXT: invoke void @g() [ "funclet"(token %callsite_parent.pad) ] +; CHECK-NEXT: unwind label %[[inlinee_cleanup:.+]] + +inlinee_cleanup: + %inlinee.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %inlinee.pad) ] + unreachable +; CHECK: [[inlinee_cleanup]]: +; CHECK-NEXT: %[[inlinee_pad:[^ ]+]] = cleanuppad within %callsite_parent.pad +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[inlinee_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +exit: + ret void +} diff --git a/test/Transforms/InstCombine/bitreverse-hang.ll b/test/Transforms/InstCombine/bitreverse-hang.ll new file mode 100644 index 000000000000..6823bd0ed653 --- /dev/null +++ b/test/Transforms/InstCombine/bitreverse-hang.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s + +; This test is a worst-case scenario for bitreversal/byteswap detection. +; After loop unrolling (the unrolled loop is unreadably large so it has been kept +; rolled here), we have a binary tree of OR operands (as bitreversal detection +; looks straight through shifts): +; +; OR +; | \ +; | LSHR +; | / +; OR +; | \ +; | LSHR +; | / +; OR +; +; This results in exponential runtime. The loop here is 32 iterations which will +; totally hang if we don't deal with this case cleverly. + +@b = common global i32 0, align 4 + +; CHECK: define i32 @fn1 +define i32 @fn1() #0 { +entry: + %b.promoted = load i32, i32* @b, align 4, !tbaa !2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ] + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %shr = lshr i32 %or4, 1 + %or = or i32 %shr, %or4 + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 32 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + store i32 %or, i32* @b, align 4, !tbaa !2 + ret i32 undef +} + +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/test/Transforms/InstCombine/bitreverse-recognize.ll b/test/Transforms/InstCombine/bitreverse-recognize.ll deleted file mode 100644 index fbd5cb6d139c..000000000000 --- a/test/Transforms/InstCombine/bitreverse-recognize.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.10.0" - -define zeroext i8 @f_u8(i8 zeroext %a) { -; CHECK-LABEL: @f_u8 -; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a) -; CHECK-NEXT: ret i8 %[[A]] - %1 = shl i8 %a, 7 - %2 = shl i8 %a, 5 - %3 = and i8 %2, 64 - %4 = shl i8 %a, 3 - %5 = and i8 %4, 32 - %6 = shl i8 %a, 1 - %7 = and i8 %6, 16 - %8 = lshr i8 %a, 1 - %9 = and i8 %8, 8 - %10 = lshr i8 %a, 3 - %11 = and i8 %10, 4 - %12 = lshr i8 %a, 5 - %13 = and i8 %12, 2 - %14 = lshr i8 %a, 7 - %15 = or i8 %14, %1 - %16 = or i8 %15, %3 - %17 = or i8 %16, %5 - %18 = or i8 %17, %7 - %19 = or i8 %18, %9 - %20 = or i8 %19, %11 - %21 = or i8 %20, %13 - ret i8 %21 -} - -; The ANDs with 32 and 64 have been swapped here, so the sequence does not -; completely match a bitreverse. -define zeroext i8 @f_u8_fail(i8 zeroext %a) { -; CHECK-LABEL: @f_u8_fail -; CHECK-NOT: call -; CHECK: ret i8 - %1 = shl i8 %a, 7 - %2 = shl i8 %a, 5 - %3 = and i8 %2, 32 - %4 = shl i8 %a, 3 - %5 = and i8 %4, 64 - %6 = shl i8 %a, 1 - %7 = and i8 %6, 16 - %8 = lshr i8 %a, 1 - %9 = and i8 %8, 8 - %10 = lshr i8 %a, 3 - %11 = and i8 %10, 4 - %12 = lshr i8 %a, 5 - %13 = and i8 %12, 2 - %14 = lshr i8 %a, 7 - %15 = or i8 %14, %1 - %16 = or i8 %15, %3 - %17 = or i8 %16, %5 - %18 = or i8 %17, %7 - %19 = or i8 %18, %9 - %20 = or i8 %19, %11 - %21 = or i8 %20, %13 - ret i8 %21 -} - -define zeroext i16 @f_u16(i16 zeroext %a) { -; CHECK-LABEL: @f_u16 -; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a) -; CHECK-NEXT: ret i16 %[[A]] - %1 = shl i16 %a, 15 - %2 = shl i16 %a, 13 - %3 = and i16 %2, 16384 - %4 = shl i16 %a, 11 - %5 = and i16 %4, 8192 - %6 = shl i16 %a, 9 - %7 = and i16 %6, 4096 - %8 = shl i16 %a, 7 - %9 = and i16 %8, 2048 - %10 = shl i16 %a, 5 - %11 = and i16 %10, 1024 - %12 = shl i16 %a, 3 - %13 = and i16 %12, 512 - %14 = shl i16 %a, 1 - %15 = and i16 %14, 256 - %16 = lshr i16 %a, 1 - %17 = and i16 %16, 128 - %18 = lshr i16 %a, 3 - %19 = and i16 %18, 64 - %20 = lshr i16 %a, 5 - %21 = and i16 %20, 32 - %22 = lshr i16 %a, 7 - %23 = and i16 %22, 16 - %24 = lshr i16 %a, 9 - %25 = and i16 %24, 8 - %26 = lshr i16 %a, 11 - %27 = and i16 %26, 4 - %28 = lshr i16 %a, 13 - %29 = and i16 %28, 2 - %30 = lshr i16 %a, 15 - %31 = or i16 %30, %1 - %32 = or i16 %31, %3 - %33 = or i16 %32, %5 - %34 = or i16 %33, %7 - %35 = or i16 %34, %9 - %36 = or i16 %35, %11 - %37 = or i16 %36, %13 - %38 = or i16 %37, %15 - %39 = or i16 %38, %17 - %40 = or i16 %39, %19 - %41 = or i16 %40, %21 - %42 = or i16 %41, %23 - %43 = or i16 %42, %25 - %44 = or i16 %43, %27 - %45 = or i16 %44, %29 - ret i16 %45 -} \ No newline at end of file diff --git a/test/Transforms/InstCombine/cos-2.ll b/test/Transforms/InstCombine/cos-2.ll index c9a9c7c07712..a85cc8fa6bde 100644 --- a/test/Transforms/InstCombine/cos-2.ll +++ b/test/Transforms/InstCombine/cos-2.ll @@ -1,12 +1,11 @@ -; Test that the cos library call simplifier works correctly. -; ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" declare float @cos(double) +declare signext i8 @sqrt(...) -; Check that cos functions with the wrong prototype aren't simplified. +; Check that functions with the wrong prototype aren't simplified. define float @test_no_simplify1(double %d) { ; CHECK-LABEL: @test_no_simplify1( @@ -15,3 +14,14 @@ define float @test_no_simplify1(double %d) { ; CHECK: call float @cos(double %neg) ret float %cos } + + +define i8 @bogus_sqrt() { + %fake_sqrt = call signext i8 (...) @sqrt() + ret i8 %fake_sqrt + +; CHECK-LABEL: bogus_sqrt( +; CHECK-NEXT: %fake_sqrt = call signext i8 (...) @sqrt() +; CHECK-NEXT: ret i8 %fake_sqrt +} + diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll index 319ea3259830..74f3ebbf5230 100644 --- a/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -364,6 +364,26 @@ define float @max1(float %a, float %b) { ; CHECK-NEXT: ret } +; A function can have a name that matches a common libcall, +; but with the wrong type(s). Let it be. + +define float @fake_fmin(float %a, float %b) { + %c = fpext float %a to fp128 + %d = fpext float %b to fp128 + %e = call fp128 @fmin(fp128 %c, fp128 %d) + %f = fptrunc fp128 %e to float + ret float %f + +; CHECK-LABEL: fake_fmin( +; CHECK-NEXT: %c = fpext float %a to fp128 +; CHECK-NEXT: %d = fpext float %b to fp128 +; CHECK-NEXT: %e = call fp128 @fmin(fp128 %c, fp128 %d) +; CHECK-NEXT: %f = fptrunc fp128 %e to float +; CHECK-NEXT: ret float %f +} + +declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for. + declare double @fmax(double, double) declare double @tanh(double) #1 diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 67e7cbd7686a..a76ec11fb1da 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -16,6 +16,7 @@ #include "OrcLazyJIT.h" #include "RemoteJITUtils.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/LinkAllCodegenComponents.h" @@ -741,11 +742,11 @@ std::unique_ptr launchRemote() { ChildPath.reset(new char[ChildExecPath.size() + 1]); std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]); ChildPath[ChildExecPath.size()] = '\0'; - std::string ChildInStr = std::to_string(PipeFD[0][0]); + std::string ChildInStr = utostr(PipeFD[0][0]); ChildIn.reset(new char[ChildInStr.size() + 1]); std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]); ChildIn[ChildInStr.size()] = '\0'; - std::string ChildOutStr = std::to_string(PipeFD[1][1]); + std::string ChildOutStr = utostr(PipeFD[1][1]); ChildOut.reset(new char[ChildOutStr.size() + 1]); std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]); ChildOut[ChildOutStr.size()] = '\0'; diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh index fb50160f5fed..c3884ba31756 100755 --- a/utils/release/test-release.sh +++ b/utils/release/test-release.sh @@ -65,11 +65,6 @@ function usage() { echo " -no-openmp Disable check-out & build libomp" } -if [ `uname -s` = "Darwin" ]; then - # compiler-rt doesn't yet build with CMake on Darwin. - use_autoconf="yes" -fi - while [ $# -gt 0 ]; do case $1 in -release | --release ) @@ -288,10 +283,20 @@ function export_sources() { if [ ! -h clang ]; then ln -s ../../cfe.src clang fi - cd $BuildDir/llvm.src/tools/clang/tools - if [ ! -h extra ]; then - ln -s ../../../../clang-tools-extra.src extra + + # The autoconf and CMake builds want different symlinks here: + if [ "$use_autoconf" = "yes" ]; then + cd $BuildDir/llvm.src/tools/clang/tools + if [ ! -h extra ]; then + ln -s ../../../../clang-tools-extra.src extra + fi + else + cd $BuildDir/cfe.src/tools + if [ ! -h extra ]; then + ln -s ../../clang-tools-extra.src extra + fi fi + cd $BuildDir/llvm.src/projects if [ -d $BuildDir/test-suite.src ] && [ ! -h test-suite ]; then ln -s ../../test-suite.src test-suite From 790462cc37eb4ee9424408ca56a74bca9b3ef693 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 22 Jan 2016 21:17:12 +0000 Subject: [PATCH 2/2] Vendor import of clang release_38 branch r258549: https://llvm.org/svn/llvm-project/cfe/branches/release_38@258549 --- lib/CodeGen/CGOpenMPRuntime.cpp | 16 +++++++------ lib/CodeGen/CGStmtOpenMP.cpp | 20 ++++++++-------- lib/CodeGen/CodeGenFunction.h | 2 ++ lib/Sema/SemaTemplate.cpp | 1 - test/OpenMP/parallel_reduction_codegen.cpp | 4 ++++ test/SemaTemplate/default-arguments.cpp | 28 ++++++++++++++++++++++ 6 files changed, 53 insertions(+), 18 deletions(-) diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 3b97ba2469ae..015a7396ffbe 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3548,14 +3548,16 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, E = CGF.EmitAnyExpr(EExpr); CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv](RValue XRValue) { + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.EmitStoreThroughLValue( - XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); - return LHSTemp; - }); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); (void)PrivateScope.Privatize(); return CGF.EmitAnyExpr(UpExpr); }); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 14917c20c535..68555128ea01 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2163,17 +2163,17 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, } } -static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, - QualType RValTy, SourceLocation Loc) { - switch (CGF.getEvaluationKind(LVal.getType())) { +void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, + QualType RValTy, SourceLocation Loc) { + switch (getEvaluationKind(LVal.getType())) { case TEK_Scalar: - CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( - CGF, RVal, RValTy, LVal.getType(), Loc)), - LVal); + EmitStoreThroughLValue(RValue::get(convertToScalarValue( + *this, RVal, RValTy, LVal.getType(), Loc)), + LVal); break; case TEK_Complex: - CGF.EmitStoreOfComplex( - convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, + EmitStoreOfComplex( + convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, /*isInit=*/false); break; case TEK_Aggregate: @@ -2201,7 +2201,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); - emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); + CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); } static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, @@ -2459,7 +2459,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, } } // Emit post-update store to 'v' of old/new 'x' value. - emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); + CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index b3d50352532e..4803b13f58d9 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2211,6 +2211,8 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); + void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, + SourceLocation Loc); /// \brief Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp index 57156078c80b..138cee0b9424 100644 --- a/lib/Sema/SemaTemplate.cpp +++ b/lib/Sema/SemaTemplate.cpp @@ -3281,7 +3281,6 @@ SubstDefaultTemplateArgument(Sema &SemaRef, for (unsigned i = 0, e = Param->getDepth(); i != e; ++i) TemplateArgLists.addOuterTemplateArguments(None); - Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext()); EnterExpressionEvaluationContext ConstantEvaluated(SemaRef, Sema::ConstantEvaluated); return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists); diff --git a/test/OpenMP/parallel_reduction_codegen.cpp b/test/OpenMP/parallel_reduction_codegen.cpp index b9744b634118..05224d0a1391 100644 --- a/test/OpenMP/parallel_reduction_codegen.cpp +++ b/test/OpenMP/parallel_reduction_codegen.cpp @@ -158,6 +158,7 @@ int main() { int vec[] = {1, 2}; S s_arr[] = {1, 2}; S var(3), var1; + float _Complex cf; #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) { vec[0] = t_var; @@ -169,6 +170,8 @@ int main() { vec[0] = t_var; s_arr[0] = var; } +#pragma omp parallel reduction(+ : cf) + ; return tmain(); #endif } @@ -178,6 +181,7 @@ int main() { // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK:@.+]] to void // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK1:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, { float, float }*)* [[MAIN_MICROTASK2:@.+]] to void // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]() // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret diff --git a/test/SemaTemplate/default-arguments.cpp b/test/SemaTemplate/default-arguments.cpp index 740a5a9d07b3..d3e249db7ee2 100644 --- a/test/SemaTemplate/default-arguments.cpp +++ b/test/SemaTemplate/default-arguments.cpp @@ -179,3 +179,31 @@ struct C { C(T t = ); // expected-error {{expected expression}} }; C obj; + +namespace PR26134 { +// Make sure when substituting default template arguments we do it in the current context. +template +struct X {}; + +template struct Y { + void f() { X xy; } + static const bool value = B; +}; + +namespace ns1 { +template +struct X { + template struct XInner { static const bool value = B; }; +}; +template struct S { static const bool value = B; }; +#if __cplusplus > 199711L +template struct Y { + static constexpr bool f() { return typename X>::template XInner<>{}.value; } + static_assert(f() == B, ""); +}; +Y y; +Y y2; +#endif + +} // end ns1 +} // end ns PR26134