Vendor import of llvm release_38 branch r261369:

https://llvm.org/svn/llvm-project/llvm/branches/release_38@261369
This commit is contained in:
Dimitry Andric 2016-02-21 13:51:43 +00:00
parent a322a4af1f
commit 3f4bde29a3
34 changed files with 651 additions and 298 deletions

View File

@ -197,12 +197,6 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``.
**CMAKE_CXX_FLAGS**:STRING
Extra flags to use when compiling C++ source files.
**BUILD_SHARED_LIBS**:BOOL
Flag indicating if shared libraries will be built. Its default value is
OFF. This option is only recommended for use by LLVM developers.
On Windows, shared libraries may be used when building with MinGW, including
mingw-w64, but not when building with the Microsoft toolchain.
.. _LLVM-specific variables:
LLVM-specific variables
@ -445,6 +439,30 @@ LLVM-specific variables
$CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can
be used to override the default system tools.
**LLVM_BUILD_LLVM_DYLIB**:BOOL
If enabled, the target for building the libLLVM shared library is added.
This library contains all of LLVM's components in a single shared library.
Defaults to OFF. This cannot be used in conjunction with BUILD_SHARED_LIBS.
Tools will only be linked to the libLLVM shared library if LLVM_LINK_LLVM_DYLIB
is also ON.
The components in the library can be customised by setting LLVM_DYLIB_COMPONENTS
to a list of the desired components.
**LLVM_LINK_LLVM_DYLIB**:BOOL
If enabled, tools will be linked with the libLLVM shared library. Defaults
to OFF. Setting LLVM_LINK_LLVM_DYLIB to ON also sets LLVM_BUILD_LLVM_DYLIB
to ON.
**BUILD_SHARED_LIBS**:BOOL
Flag indicating if each LLVM component (e.g. Support) is built as a shared
library (ON) or as a static library (OFF). Its default value is OFF. On
Windows, shared libraries may be used when building with MinGW, including
mingw-w64, but not when building with the Microsoft toolchain.
.. note:: BUILD_SHARED_LIBS is only recommended for use by LLVM developers.
If you want to build LLVM as a shared library, you should use the
``LLVM_BUILD_LLVM_DYLIB`` option.
Executing the test suite
========================

View File

@ -89,6 +89,30 @@ Non-comprehensive list of changes in this release
the node ``N`` is guaranteed not to be the last in the list, it is safe to
call ``&*++N->getIterator()`` directly.
* The `Kaleidoscope tutorials <tutorial/index.html>`_ have been updated to use
the ORC JIT APIs.
* ORC now has a basic set of C bindings.
* Optional support for linking clang and the LLVM tools with a single libLLVM
shared library. To enable this, pass ``-DLLVM_LINK_LLVM_DYLIB=ON`` to CMake.
See `Building LLVM with CMake`_ for more details.
* The optimization to move the prologue and epilogue of functions in colder
code path (shrink-wrapping) is now enabled by default.
* A new target-independent gcc-compatible emulated Thread Local Storage mode
is added. When ``-femultated-tls`` flag is used, all accesses to TLS
variables are converted to calls to ``__emutls_get_address`` in the runtime
library.
* MSVC compatible exception handling has been completely overhauled. New
instructions have been introduced to facilitate this:
`New exception handling instructions <ExceptionHandling.html#new-exception-handling-instructions>`_.
While we have done our best to test this feature thoroughly, it would
not be completely surprising if there were a few lingering issues that
early adopters might bump into.
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
@ -115,7 +139,7 @@ Changes to the ARM Backends
During this release the AArch64 target has:
* Added support for more sanitizers (MSAN, TSAN) and made them compatible with
all VMA kernel configurations (kurrently tested on 39 and 42 bits).
all VMA kernel configurations (currently tested on 39 and 42 bits).
* Gained initial LLD support in the new ELF back-end
* Extended the Load/Store optimiser and cleaned up some of the bad decisions
made earlier.
@ -218,9 +242,16 @@ Changes to the X86 Target
* More efficient code for wide integer compares. (E.g. 64-bit compares
on 32-bit targets.)
* Tail call support for ``thiscall``, ``stdcall`, ``vectorcall``, and
* Tail call support for ``thiscall``, ``stdcall``, ``vectorcall``, and
``fastcall`` functions.
Changes to the Hexagon Target
-----------------------------
In addition to general code size and performance improvements, Hexagon target
now has basic support for Hexagon V60 architecture and Hexagon Vector
Extensions (HVX).
Changes to the AVR Target
-------------------------

View File

@ -544,6 +544,11 @@ namespace llvm {
return true;
}
// Returns true if any segment in the live range contains any of the
// provided slot indexes. Slots which occur in holes between
// segments will not cause the function to return true.
bool isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const;
bool operator<(const LiveRange& other) const {
const SlotIndex &thisIndex = beginIndex();
const SlotIndex &otherIndex = other.beginIndex();

View File

@ -1539,16 +1539,7 @@ public:
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
ArrayRef<OperandBundleDef> OpBundles = None,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
return Insert(CI, Name);
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
const Twine &Name, MDNode *FPMathTag = nullptr) {
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
return CreateCall(FTy, Callee, Args, Name, FPMathTag);
@ -1563,6 +1554,15 @@ public:
return Insert(CI, Name);
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
return Insert(CI, Name);
}
CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);

View File

@ -2512,6 +2512,14 @@ public:
return block_begin() + getNumOperands();
}
iterator_range<block_iterator> blocks() {
return make_range(block_begin(), block_end());
}
iterator_range<const_block_iterator> blocks() const {
return make_range(block_begin(), block_end());
}
op_range incoming_values() { return operands(); }
const_op_range incoming_values() const { return operands(); }

View File

@ -213,8 +213,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
if (!Spillable)
return;
// Mark li as unspillable if all live ranges are tiny.
if (li.isZeroLength(LIS.getSlotIndexes())) {
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
// spilling may be required.
if (li.isZeroLength(LIS.getSlotIndexes()) &&
!li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
li.markNotSpillable();
return;
}

View File

@ -748,6 +748,40 @@ void LiveRange::flushSegmentSet() {
verify();
}
bool LiveRange::isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const {
ArrayRef<SlotIndex>::iterator SlotI = Slots.begin();
ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
// If there are no regmask slots, we have nothing to search.
if (SlotI == SlotE)
return false;
// Start our search at the first segment that ends after the first slot.
const_iterator SegmentI = find(*SlotI);
const_iterator SegmentE = end();
// If there are no segments that end after the first slot, we're done.
if (SegmentI == SegmentE)
return false;
// Look for each slot in the live range.
for ( ; SlotI != SlotE; ++SlotI) {
// Go to the next segment that ends after the current slot.
// The slot may be within a hole in the range.
SegmentI = advanceTo(SegmentI, *SlotI);
if (SegmentI == SegmentE)
return false;
// If this segment contains the slot, we're done.
if (SegmentI->contains(*SlotI))
return true;
// Otherwise, look for the next slot.
}
// We didn't find a segment containing any of the slots.
return false;
}
void LiveInterval::freeSubRange(SubRange *S) {
S->~SubRange();
// Memory was allocated with BumpPtr allocator and is not freed here.

View File

@ -1637,6 +1637,7 @@ struct FloatSignAsInt {
MachinePointerInfo FloatPointerInfo;
SDValue IntValue;
APInt SignMask;
uint8_t SignBit;
};
}
@ -1653,6 +1654,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
if (TLI.isTypeLegal(IVT)) {
State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
State.SignMask = APInt::getSignBit(NumBits);
State.SignBit = NumBits - 1;
return;
}
@ -1689,6 +1691,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
IntPtr, State.IntPointerInfo, MVT::i8,
false, false, false, 0);
State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
State.SignBit = 7;
}
/// Replace the integer value produced by getSignAsIntValue() with a new value
@ -1731,15 +1734,38 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
}
// Transform values to integer, copy the sign bit and transform back.
// Transform Mag value to integer, and clear the sign bit.
FloatSignAsInt MagAsInt;
getSignAsIntValue(MagAsInt, DL, Mag);
assert(SignAsInt.SignMask == MagAsInt.SignMask);
SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
EVT MagVT = MagAsInt.IntValue.getValueType();
SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT);
SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue,
ClearSignMask);
SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
// Get the signbit at the right position for MagAsInt.
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
if (ShiftAmount > 0) {
SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
} else if (ShiftAmount < 0) {
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
}
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
} else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
if (ShiftAmount > 0) {
SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
} else if (ShiftAmount < 0) {
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
}
}
// Store the part with the modified sign and convert back to float.
SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
return modifySignAsInt(MagAsInt, DL, CopiedSign);
}

View File

@ -257,10 +257,14 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
if (getCleanupRetUnwindDest(InnerCleanupPad) ==
CatchSwitch->getUnwindDest())
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
// If a nested cleanup pad reports a null unwind destination and the
// enclosing catch pad doesn't it must be post-dominated by an
// unreachable instruction.
if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
}
}
}
int CatchHigh = FuncInfo.getLastStateNumber();
@ -360,10 +364,14 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
if (getCleanupRetUnwindDest(InnerCleanupPad) ==
CatchSwitch->getUnwindDest())
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
// If a nested cleanup pad reports a null unwind destination and the
// enclosing catch pad doesn't it must be post-dominated by an
// unreachable instruction.
if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
}
}
} else {
auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);

View File

@ -1,8 +1,17 @@
include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
if( HAVE_LIBDL )
set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS})
endif()
if( HAVE_LIBPTHREAD )
set(LLVM_INTEL_JIT_LIBS pthread ${LLVM_INTEL_JIT_LIBS})
endif()
add_llvm_library(LLVMIntelJITEvents
IntelJITEventListener.cpp
jitprofiling.c
LINK_LIBS pthread ${CMAKE_DL_LIBS}
LINK_LIBS ${LLVM_INTEL_JIT_LIBS}
)
add_dependencies(LLVMIntelJITEvents LLVMCodeGen)

View File

@ -21,4 +21,4 @@
type = OptionalLibrary
name = IntelJITEvents
parent = ExecutionEngine
required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine
required_libraries = CodeGen Core DebugInfoDWARF Support Object ExecutionEngine

View File

@ -338,9 +338,9 @@ static Triple::ArchType parseArch(StringRef ArchName) {
// FIXME: Do we need to support these?
.Cases("i786", "i886", "i986", Triple::x86)
.Cases("amd64", "x86_64", "x86_64h", Triple::x86_64)
.Case("powerpc", Triple::ppc)
.Cases("powerpc64", "ppu", Triple::ppc64)
.Case("powerpc64le", Triple::ppc64le)
.Cases("powerpc", "ppc32", Triple::ppc)
.Cases("powerpc64", "ppu", "ppc64", Triple::ppc64)
.Cases("powerpc64le", "ppc64le", Triple::ppc64le)
.Case("xscale", Triple::arm)
.Case("xscaleeb", Triple::armeb)
.Case("aarch64", Triple::aarch64)
@ -359,7 +359,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("r600", Triple::r600)
.Case("amdgcn", Triple::amdgcn)
.Case("hexagon", Triple::hexagon)
.Case("s390x", Triple::systemz)
.Cases("s390x", "systemz", Triple::systemz)
.Case("sparc", Triple::sparc)
.Case("sparcel", Triple::sparcel)
.Cases("sparcv9", "sparc64", Triple::sparcv9)

View File

@ -267,9 +267,9 @@ defm : int_cond_alias<"neg", 0b0110>;
defm : int_cond_alias<"vc", 0b1111>;
defm : int_cond_alias<"vs", 0b0111>;
defm : fp_cond_alias<"a", 0b0000>;
defm : fp_cond_alias<"", 0b0000>; // same as a; gnu asm, not in manual
defm : fp_cond_alias<"n", 0b1000>;
defm : fp_cond_alias<"a", 0b1000>;
defm : fp_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual
defm : fp_cond_alias<"n", 0b0000>;
defm : fp_cond_alias<"u", 0b0111>;
defm : fp_cond_alias<"g", 0b0110>;
defm : fp_cond_alias<"ug", 0b0101>;

View File

@ -69,19 +69,15 @@ public:
class X86AsmBackend : public MCAsmBackend {
const StringRef CPU;
bool HasNopl;
uint64_t MaxNopLength;
const uint64_t MaxNopLength;
public:
X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
X86AsmBackend(const Target &T, StringRef CPU)
: MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
CPU != "c3" && CPU != "c3-2";
// Max length of true long nop instruction is 15 bytes.
// Max length of long nop replacement instruction is 7 bytes.
// Taking into account SilverMont architecture features max length of nops
// is reduced for it to achieve better performance.
MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
}
unsigned getNumFixupKinds() const override {
@ -299,7 +295,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// bytes.
/// \return - true on success, false on failure
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
static const uint8_t TrueNops[10][10] = {
static const uint8_t Nops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@ -322,31 +318,17 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
// Alternative nop instructions for CPUs which don't support long nops.
static const uint8_t AltNops[7][10] = {
// nop
{0x90},
// xchg %ax,%ax
{0x66, 0x90},
// lea 0x0(%esi),%esi
{0x8d, 0x76, 0x00},
// lea 0x0(%esi),%esi
{0x8d, 0x74, 0x26, 0x00},
// nop + lea 0x0(%esi),%esi
{0x90, 0x8d, 0x74, 0x26, 0x00},
// lea 0x0(%esi),%esi
{0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
// lea 0x0(%esi),%esi
{0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
};
// This CPU doesn't support long nops. If needed add more.
// FIXME: Can we get this from the subtarget somehow?
// FIXME: We could generated something better than plain 0x90.
if (!HasNopl) {
for (uint64_t i = 0; i < Count; ++i)
OW->write8(0x90);
return true;
}
// Select the right NOP table.
// FIXME: Can we get if CPU supports long nops from the subtarget somehow?
const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
assert(HasNopl || MaxNopLength <= 7);
// Emit as many largest nops as needed, then emit a nop of the remaining
// length.
// 15 is the longest single nop instruction. Emit as many 15-byte nops as
// needed, then emit a nop of the remaining length.
do {
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;

View File

@ -192,10 +192,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
return 0;
}
static bool isEAXLiveIn(MachineFunction &MF) {
for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
unsigned Reg = II->first;
static bool isEAXLiveIn(MachineBasicBlock &MBB) {
for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
unsigned Reg = RegMask.PhysReg;
if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
Reg == X86::AH || Reg == X86::AL)
@ -261,7 +260,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
// load the offset into a register and do one sub/add
unsigned Reg = 0;
if (isSub && !isEAXLiveIn(*MBB.getParent()))
if (isSub && !isEAXLiveIn(MBB))
Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
else
Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
@ -1133,8 +1132,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
// Check whether EAX is livein for this block.
bool isEAXAlive = isEAXLiveIn(MBB);
if (isEAXAlive) {
// Sanity check that EAX is not livein for this function.

View File

@ -5896,7 +5896,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
def : Pat<(_.EltVT (OpNode (load addr:$src))),
(!cast<Instruction>(NAME#SUFF#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
(_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
}
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {

View File

@ -29,6 +29,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@ -186,32 +187,8 @@ bool PruneEH::SimplifyFunction(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
"", II);
Call->takeName(II);
Call->setCallingConv(II->getCallingConv());
Call->setAttributes(II->getAttributes());
Call->setDebugLoc(II->getDebugLoc());
// Anything that used the value produced by the invoke instruction
// now uses the value produced by the call instruction. Note that we
// do this even for void functions and calls with no uses so that the
// callgraph edge is updated.
II->replaceAllUsesWith(Call);
BasicBlock *UnwindBlock = II->getUnwindDest();
UnwindBlock->removePredecessor(II->getParent());
// Insert a branch to the normal destination right before the
// invoke.
BranchInst::Create(II->getNormalDest(), II);
// Finally, delete the invoke instruction!
BB->getInstList().pop_back();
removeUnwindEdge(&*BB);
// If the unwind block is now dead, nuke it.
if (pred_empty(UnwindBlock))
@ -251,23 +228,39 @@ void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
assert(pred_empty(BB) && "BB is not dead!");
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
Instruction *TokenInst = nullptr;
CallGraphNode *CGN = CG[BB->getParent()];
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
--I;
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (!isa<IntrinsicInst>(I))
CGN->removeCallEdgeFor(CI);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
CGN->removeCallEdgeFor(II);
if (I->getType()->isTokenTy()) {
TokenInst = &*I;
break;
}
if (auto CS = CallSite (&*I)) {
const Function *Callee = CS.getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
CGN->removeCallEdgeFor(CS);
else if (!Callee->isIntrinsic())
CGN->removeCallEdgeFor(CS);
}
if (!I->use_empty())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
}
// Get the list of successors of this block.
std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
if (TokenInst) {
if (!isa<TerminatorInst>(TokenInst))
changeToUnreachable(TokenInst->getNextNode(), /*UseLLVMTrap=*/false);
} else {
// Get the list of successors of this block.
std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
Succs[i]->removePredecessor(BB);
for (unsigned i = 0, e = Succs.size(); i != e; ++i)
Succs[i]->removePredecessor(BB);
BB->eraseFromParent();
BB->eraseFromParent();
}
}

View File

@ -4799,6 +4799,17 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U << "\n");
return;
}
// Bail out if we have a PHI on an EHPad that gets a value from a
// CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is
// no good place to stick any instructions.
if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
if (isa<FuncletPadInst>(FirstNonPHI) ||
isa<CatchSwitchInst>(FirstNonPHI))
for (BasicBlock *PredBB : PN->blocks())
if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
return;
}
}
#ifndef NDEBUG

View File

@ -1409,14 +1409,15 @@ private:
/// different operations.
class LoopVectorizationCostModel {
public:
LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
LoopInfo *LI, LoopVectorizationLegality *Legal,
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
const TargetLibraryInfo *TLI, DemandedBits *DB,
AssumptionCache *AC, const Function *F,
const LoopVectorizeHints *Hints)
: TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
AC(AC), TheFunction(F), Hints(Hints) {}
const LoopVectorizeHints *Hints,
SmallPtrSetImpl<const Value *> &ValuesToIgnore)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
/// Information about vectorization costs
struct VectorizationFactor {
@ -1464,9 +1465,6 @@ public:
SmallVector<RegisterUsage, 8>
calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
/// Collect values we want to ignore in the cost model.
void collectValuesToIgnore();
private:
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
@ -1498,8 +1496,8 @@ public:
/// The loop that we evaluate.
Loop *TheLoop;
/// Predicated scalar evolution analysis.
PredicatedScalarEvolution &PSE;
/// Scev analysis.
ScalarEvolution *SE;
/// Loop Info analysis.
LoopInfo *LI;
/// Vectorization legality.
@ -1508,17 +1506,13 @@ public:
const TargetTransformInfo &TTI;
/// Target Library Info.
const TargetLibraryInfo *TLI;
/// Demanded bits analysis.
/// Demanded bits analysis
DemandedBits *DB;
/// Assumption cache.
AssumptionCache *AC;
const Function *TheFunction;
/// Loop Vectorize Hint.
// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
/// Values to ignore in the cost model.
SmallPtrSet<const Value *, 16> ValuesToIgnore;
/// Values to ignore in the cost model when VF > 1.
SmallPtrSet<const Value *, 16> VecValuesToIgnore;
// Values to ignore in the cost model.
const SmallPtrSetImpl<const Value *> &ValuesToIgnore;
};
/// \brief This holds vectorization requirements that must be verified late in
@ -1763,10 +1757,19 @@ struct LoopVectorize : public FunctionPass {
return false;
}
// Collect values we want to ignore in the cost model. This includes
// type-promoting instructions we identified during reduction detection.
SmallPtrSet<const Value *, 32> ValuesToIgnore;
CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore);
for (auto &Reduction : *LVL.getReductionVars()) {
RecurrenceDescriptor &RedDes = Reduction.second;
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
ValuesToIgnore.insert(Casts.begin(), Casts.end());
}
// Use the cost model.
LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
&Hints);
CM.collectValuesToIgnore();
LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC,
F, &Hints, ValuesToIgnore);
// Check the function attributes to find out if this function should be
// optimized for size.
@ -4636,6 +4639,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Holds all interleaved store groups temporarily.
SmallSetVector<InterleaveGroup *, 4> StoreGroups;
// Holds all interleaved load groups temporarily.
SmallSetVector<InterleaveGroup *, 4> LoadGroups;
// Search the load-load/write-write pair B-A in bottom-up order and try to
// insert B into the interleave group of A according to 3 rules:
@ -4663,6 +4668,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (A->mayWriteToMemory())
StoreGroups.insert(Group);
else
LoadGroups.insert(Group);
for (auto II = std::next(I); II != E; ++II) {
Instruction *B = II->first;
@ -4710,6 +4717,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
for (InterleaveGroup *Group : StoreGroups)
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
// Remove interleaved load groups that don't have the first and last member.
// This guarantees that we won't do speculative out of bounds loads.
for (InterleaveGroup *Group : LoadGroups)
if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
releaseGroup(Group);
}
LoopVectorizationCostModel::VectorizationFactor
@ -4734,7 +4747,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
}
// Find the trip count.
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
unsigned TC = SE->getSmallConstantTripCount(TheLoop);
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
@ -4936,7 +4949,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
return 1;
// Do not interleave loops with a relatively small trip count.
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
unsigned TC = SE->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
@ -5164,15 +5177,15 @@ LoopVectorizationCostModel::calculateRegisterUsage(
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
// Skip ignored values.
if (ValuesToIgnore.count(I))
continue;
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
for (unsigned int j = 0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
// Skip ignored values.
if (ValuesToIgnore.count(I))
continue;
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
if (VFs[j] == 1) {
@ -5182,12 +5195,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(
// Count the number of live intervals.
unsigned RegUsage = 0;
for (auto Inst : OpenIntervals) {
// Skip ignored values for VF > 1.
if (VecValuesToIgnore.count(Inst))
continue;
for (auto Inst : OpenIntervals)
RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
}
MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
@ -5331,7 +5340,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
if (VF > 1 && MinBWs.count(I))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
Type *VectorTy = ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
@ -5633,79 +5641,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore ephemeral values.
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
// Ignore type-promoting instructions we identified during reduction
// detection.
for (auto &Reduction : *Legal->getReductionVars()) {
RecurrenceDescriptor &RedDes = Reduction.second;
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
// Ignore induction phis that are only used in either GetElementPtr or ICmp
// instruction to exit loop. Induction variables usually have large types and
// can have big impact when estimating register usage.
// This is for when VF > 1.
for (auto &Induction : *Legal->getInductionVars()) {
auto *PN = Induction.first;
auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
// Check that the PHI is only used by the induction increment (UpdateV) or
// by GEPs. Then check that UpdateV is only used by a compare instruction or
// the loop header PHI.
// FIXME: Need precise def-use analysis to determine if this instruction
// variable will be vectorized.
if (std::all_of(PN->user_begin(), PN->user_end(),
[&](const User *U) -> bool {
return U == UpdateV || isa<GetElementPtrInst>(U);
}) &&
std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
[&](const User *U) -> bool {
return U == PN || isa<ICmpInst>(U);
})) {
VecValuesToIgnore.insert(PN);
VecValuesToIgnore.insert(UpdateV);
}
}
// Ignore instructions that will not be vectorized.
// This is for when VF > 1.
for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
++bb) {
for (auto &Inst : **bb) {
switch (Inst.getOpcode()) {
case Instruction::GetElementPtr: {
// Ignore GEP if its last operand is an induction variable so that it is
// a consecutive load/store and won't be vectorized as scatter/gather
// pattern.
GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
unsigned NumOperands = Gep->getNumOperands();
unsigned InductionOperand = getGEPInductionOperand(Gep);
bool GepToIgnore = true;
// Check that all of the gep indices are uniform except for the
// induction operand.
for (unsigned i = 0; i != NumOperands; ++i) {
if (i != InductionOperand &&
!PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
TheLoop)) {
GepToIgnore = false;
break;
}
}
if (GepToIgnore)
VecValuesToIgnore.insert(&Inst);
break;
}
}
}
}
}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore) {

View File

@ -0,0 +1,23 @@
; RUN: llc -o - %s | FileCheck %s
; Check that selection dag legalization of fcopysign works in cases with
; different modes for the arguments.
target triple = "aarch64--"
declare fp128 @llvm.copysign.f128(fp128, fp128)
@val = global double zeroinitializer, align 8
; CHECK-LABEL: copysign0
; CHECK: ldr [[REG:x[0-9]+]], [x8, :lo12:val]
; CHECK: and [[ANDREG:x[0-9]+]], [[REG]], #0x8000000000000000
; CHECK: lsr x[[LSRREGNUM:[0-9]+]], [[ANDREG]], #56
; CHECK: bfxil w[[LSRREGNUM]], w{{[0-9]+}}, #0, #7
; CHECK: strb w[[LSRREGNUM]],
; CHECK: ldr q{{[0-9]+}},
define fp128 @copysign0() {
entry:
%v = load double, double* @val, align 8
%conv = fpext double %v to fp128
%call = tail call fp128 @llvm.copysign.f128(fp128 0xL00000000000000007FFF000000000000, fp128 %conv) #2
ret fp128 %call
}

View File

@ -0,0 +1,80 @@
; RUN: sed -e s/.Cxx:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=CXX
; RUN: sed -e s/.Seh:// %s | llc -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefix=SEH
declare i32 @__CxxFrameHandler3(...)
declare i32 @__C_specific_handler(...)
declare void @dummy_filter()
declare void @f(i32)
;Cxx: define void @test() personality i32 (...)* @__CxxFrameHandler3 {
;Seh: define void @test() personality i32 (...)* @__C_specific_handler {
entry:
invoke void @f(i32 1)
to label %invoke.cont unwind label %catch.dispatch
catch.dispatch:
%cs1 = catchswitch within none [label %catch.body] unwind label %catch.dispatch.2
catch.body:
;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
;Seh: %catch = catchpad within %cs1 [void ()* @dummy_filter]
invoke void @f(i32 2) [ "funclet"(token %catch) ]
to label %unreachable unwind label %terminate
terminate:
%cleanup = cleanuppad within %catch []
call void @f(i32 3) [ "funclet"(token %cleanup) ]
unreachable
unreachable:
unreachable
invoke.cont:
ret void
catch.dispatch.2:
%cs2 = catchswitch within none [label %catch.body.2] unwind to caller
catch.body.2:
;Cxx: %catch2 = catchpad within %cs2 [i8* null, i32 u0x40, i8* null]
;Seh: %catch2 = catchpad within %cs2 [void ()* @dummy_filter]
unreachable
}
; CXX-LABEL: test:
; CXX-LABEL: $ip2state$test:
; CXX-NEXT: .long .Lfunc_begin0@IMGREL
; CXX-NEXT: .long -1
; CXX-NEXT: .long .Ltmp0@IMGREL+1
; CXX-NEXT: .long 1
; CXX-NEXT: .long .Ltmp1@IMGREL+1
; CXX-NEXT: .long -1
; CXX-NEXT: .long "?catch$3@?0?test@4HA"@IMGREL
; CXX-NEXT: .long 2
; CXX-NEXT: .long .Ltmp2@IMGREL+1
; CXX-NEXT: .long 3
; CXX-NEXT: .long .Ltmp3@IMGREL+1
; CXX-NEXT: .long 2
; CXX-NEXT: .long "?catch$5@?0?test@4HA"@IMGREL
; CXX-NEXT: .long 4
; SEH-LABEL: test:
; SEH-LABEL: .Llsda_begin0:
; SEH-NEXT: .long .Ltmp0@IMGREL+1
; SEH-NEXT: .long .Ltmp1@IMGREL+1
; SEH-NEXT: .long dummy_filter@IMGREL
; SEH-NEXT: .long .LBB0_3@IMGREL
; SEH-NEXT: .long .Ltmp0@IMGREL+1
; SEH-NEXT: .long .Ltmp1@IMGREL+1
; SEH-NEXT: .long dummy_filter@IMGREL
; SEH-NEXT: .long .LBB0_5@IMGREL
; SEH-NEXT: .long .Ltmp2@IMGREL+1
; SEH-NEXT: .long .Ltmp3@IMGREL+1
; SEH-NEXT: .long "?dtor$2@?0?test@4HA"@IMGREL
; SEH-NEXT: .long 0
; SEH-NEXT: .long .Ltmp2@IMGREL+1
; SEH-NEXT: .long .Ltmp3@IMGREL+1
; SEH-NEXT: .long dummy_filter@IMGREL
; SEH-NEXT: .long .LBB0_5@IMGREL
; SEH-NEXT: .Llsda_end0:

View File

@ -0,0 +1,20 @@
; RUN: llc < %s -mcpu=i686 2>&1 | FileCheck %s
; PR26625
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386"
define float @x0(float %f) #0 {
entry:
%call = tail call float @sqrtf(float %f) #1
ret float %call
; CHECK-LABEL: x0:
; CHECK: flds
; CHECK-NEXT: fsqrt
; CHECK-NOT: vsqrtss
}
declare float @sqrtf(float) #0
attributes #0 = { nounwind optsize readnone }
attributes #1 = { nounwind optsize readnone }

View File

@ -0,0 +1,75 @@
; RUN: llc -regalloc=greedy -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck %s
; This test checks for proper handling of a condition where the greedy register
; allocator encounters a very short interval that contains no uses but does
; contain an EH pad unwind edge, which requires spilling. Previously the
; register allocator marked a interval like this as unspillable, resulting in
; a compilation failure.
; The following checks that the value %p is reloaded within the catch handler.
; CHECK-LABEL: "?catch$8@?0?test@4HA":
; CHECK: .seh_endprologue
; CHECK: movq -16(%rbp), %rax
; CHECK: movb $0, (%rax)
define i32* @test(i32* %a) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
entry:
%call = call i32 @f()
%p = bitcast i32* %a to i8*
br i1 undef, label %if.end, label %if.else
if.else: ; preds = %entry
br i1 undef, label %cond.false.i, label %if.else.else
if.else.else: ; preds = %if.else
br i1 undef, label %cond.true.i, label %cond.false.i
cond.true.i: ; preds = %if.else.else
br label %invoke.cont
cond.false.i: ; preds = %if.else.else, %if.else
%call.i = invoke i32 @f()
to label %invoke.cont unwind label %catch.dispatch
catch.dispatch: ; preds = %cond.false.i
%tmp0 = catchswitch within none [label %catch] unwind label %ehcleanup
catch: ; preds = %catch.dispatch
%tmp1 = catchpad within %tmp0 [i8* null, i32 64, i8* null]
%p.0 = getelementptr inbounds i8, i8* %p, i64 0
store i8 0, i8* %p.0, align 8
invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %tmp1) ]
to label %noexc unwind label %ehcleanup
noexc: ; preds = %catch
unreachable
invoke.cont: ; preds = %cond.false.i, %cond.true.i
%cond.i = phi i32 [ %call, %cond.true.i ], [ %call.i, %cond.false.i ]
%cmp = icmp eq i32 %cond.i, -1
%tmp3 = select i1 %cmp, i32 4, i32 0
br label %if.end
if.end: ; preds = %invoke.cont, %entry
%state.0 = phi i32 [ %tmp3, %invoke.cont ], [ 4, %entry ]
%p.1 = getelementptr inbounds i8, i8* %p, i64 0
invoke void @g(i8* %p.1, i32 %state.0)
to label %invoke.cont.1 unwind label %ehcleanup
invoke.cont.1: ; preds = %if.end
ret i32* %a
ehcleanup: ; preds = %if.end, %catch, %catch.dispatch
%tmp4 = cleanuppad within none []
cleanupret from %tmp4 unwind to caller
}
%eh.ThrowInfo = type { i32, i32, i32, i32 }
declare i32 @__CxxFrameHandler3(...)
declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
declare i32 @f()
declare void @g(i8*, i32)

View File

@ -1,6 +1,8 @@
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
; chkstk cannot come before the usual prologue, since it adjusts ESP.
; If chkstk is used in the prologue, we also have to be careful about preserving
; EAX if it is used.
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-msvc18.0.0"
@ -35,3 +37,36 @@ bb2:
; CHECK: retl
declare void @inalloca_params(<{ %struct.S }>* inalloca)
declare i32 @doSomething(i32, i32*)
; In this test case, we force usage of EAX before the prologue, and have to
; compensate before calling __chkstk. It would also be valid for us to avoid
; shrink wrapping in this case.
define x86_fastcallcc i32 @use_eax_before_prologue(i32 inreg %a, i32 inreg %b) {
%tmp = alloca i32, i32 1024, align 4
%tmp2 = icmp slt i32 %a, %b
br i1 %tmp2, label %true, label %false
true:
store i32 %a, i32* %tmp, align 4
%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
br label %false
false:
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
ret i32 %tmp.0
}
; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue
; CHECK: movl %ecx, %eax
; CHECK: cmpl %edx, %eax
; CHECK: jge LBB1_2
; CHECK: pushl %eax
; CHECK: movl $4100, %eax
; CHECK: calll __chkstk
; CHECK: movl 4100(%esp), %eax
; CHECK: calll _doSomething
; CHECK: LBB1_2:
; CHECK: retl

View File

@ -124,6 +124,18 @@
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
bvs .BB0
! CHECK: fba .BB0 ! encoding: [0x11,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fba .BB0
! CHECK: fba .BB0 ! encoding: [0x11,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fb .BB0
! CHECK: fbn .BB0 ! encoding: [0x01,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fbn .BB0
! CHECK: fbu .BB0 ! encoding: [0x0f,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fbu .BB0
@ -131,6 +143,7 @@
! CHECK: fbg .BB0 ! encoding: [0x0d,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fbg .BB0
! CHECK: fbug .BB0 ! encoding: [0x0b,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
fbug .BB0

View File

@ -770,6 +770,18 @@
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
bpos,a,pt %xcc, .BB0
! CHECK: fba %fcc0, .BB0 ! encoding: [0x11,0b01001AAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
fba %fcc0, .BB0
! CHECK: fba %fcc0, .BB0 ! encoding: [0x11,0b01001AAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
fb %fcc0, .BB0
! CHECK: fbn %fcc0, .BB0 ! encoding: [0x01,0b01001AAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
fbn %fcc0, .BB0
! CHECK: fbu %fcc0, .BB0 ! encoding: [0x0f,0b01001AAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br19
fbu %fcc0, .BB0

View File

@ -22,7 +22,13 @@ inc %eax
inc %eax
// CHECK: 0: 40 incl %eax
// CHECK: 1: 8d b4 26 00 00 00 00 leal (%esi), %esi
// CHECK: 1: 90 nop
// CHECK: 2: 90 nop
// CHECK: 3: 90 nop
// CHECK: 4: 90 nop
// CHECK: 5: 90 nop
// CHECK: 6: 90 nop
// CHECK: 7: 90 nop
// CHECK: 8: 40 incl %eax

View File

@ -61,6 +61,15 @@ define void @test_simplify4(%FILE* %fp) {
; CHECK-IPRINTF-NEXT: ret void
}
define void @test_simplify5(%FILE* %fp) {
; CHECK-LABEL: @test_simplify5(
%fmt = getelementptr [13 x i8], [13 x i8]* @hello_world, i32 0, i32 0
call i32 (%FILE*, i8*, ...) @fprintf(%FILE* %fp, i8* %fmt) [ "deopt"() ]
; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp) [ "deopt"() ]
ret void
; CHECK-NEXT: ret void
}
define void @test_no_simplify1(%FILE* %fp) {
; CHECK-IPRINTF-LABEL: @test_no_simplify1(
%fmt = getelementptr [3 x i8], [3 x i8]* @percent_f, i32 0, i32 0

View File

@ -214,3 +214,32 @@ try.cont.7: ; preds = %try.cont
; CHECK: catch.dispatch.2:
; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
define i32 @test2() personality i32 (...)* @_except_handler3 {
entry:
br label %for.body
for.body: ; preds = %for.inc, %entry
%phi = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
invoke void @reserve()
to label %for.inc unwind label %catch.dispatch
catch.dispatch: ; preds = %for.body
%tmp18 = catchswitch within none [label %catch.handler] unwind to caller
catch.handler: ; preds = %catch.dispatch
%phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
%tmp19 = catchpad within %tmp18 [i8* null]
catchret from %tmp19 to label %done
done:
ret i32 %phi.lcssa
for.inc: ; preds = %for.body
%inc = add i32 %phi, 1
br label %for.body
}
; CHECK-LABEL: define i32 @test2(
; CHECK: %phi.lcssa = phi i32 [ %phi, %catch.dispatch ]
; CHECK-NEXT: catchpad within

View File

@ -16,9 +16,15 @@ for.cond.cleanup: ; preds = %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%0 = shl nsw i64 %indvars.iv, 1
%odd.idx = add nsw i64 %0, 1
%arrayidx = getelementptr inbounds double, double* %b, i64 %0
%arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
%1 = load double, double* %arrayidx, align 8
%add = fadd double %1, 1.000000e+00
%2 = load double, double* %arrayidx.odd, align 8
%add = fadd double %1, %2
%arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
store double %add, double* %arrayidx2, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1

View File

@ -1,71 +0,0 @@
; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
; REQUIRES: asserts
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@a = global [1024 x i8] zeroinitializer, align 16
@b = global [1024 x i8] zeroinitializer, align 16
define i32 @foo() {
; This function has a loop of SAD pattern. Here we check when VF = 16 the
; register usage doesn't exceed 16.
;
; CHECK-LABEL: foo
; CHECK: LV(REG): VF = 4
; CHECK-NEXT: LV(REG): Found max usage: 4
; CHECK: LV(REG): VF = 8
; CHECK-NEXT: LV(REG): Found max usage: 7
; CHECK: LV(REG): VF = 16
; CHECK-NEXT: LV(REG): Found max usage: 13
entry:
br label %for.body
for.cond.cleanup:
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
%1 = load i8, i8* %arrayidx2, align 1
%conv3 = zext i8 %1 to i32
%sub = sub nsw i32 %conv, %conv3
%ispos = icmp sgt i32 %sub, -1
%neg = sub nsw i32 0, %sub
%2 = select i1 %ispos, i32 %sub, i32 %neg
%add = add nsw i32 %2, %s.015
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
define i64 @bar(i64* nocapture %a) {
; CHECK-LABEL: bar
; CHECK: LV(REG): VF = 2
; CHECK: LV(REG): Found max usage: 4
;
entry:
br label %for.body
for.cond.cleanup:
%add2.lcssa = phi i64 [ %add2, %for.body ]
ret i64 %add2.lcssa
for.body:
%i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
%arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
%0 = load i64, i64* %arrayidx, align 8
%add = add nsw i64 %0, %i.012
store i64 %add, i64* %arrayidx, align 8
%add2 = add nsw i64 %add, %s.011
%inc = add nuw nsw i64 %i.012, 1
%exitcond = icmp eq i64 %inc, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

View File

@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
; -vectorizer-maximize-bandwidth is indicated.
;
; CHECK-label: foo
; CHECK: LV: Selecting VF: 32.
; CHECK: LV: Selecting VF: 16.
define void @foo() {
entry:
br label %for.body

View File

@ -292,10 +292,8 @@ for.body: ; preds = %for.body, %entry
; }
; CHECK-LABEL: @even_load(
; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
entry:

View File

@ -0,0 +1,56 @@
; RUN: opt -prune-eh -S < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc"
declare void @neverthrows() nounwind
define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
invoke void @neverthrows()
to label %try.cont unwind label %cleanuppad
try.cont:
ret void
cleanuppad:
%cp = cleanuppad within none []
br label %cleanupret
cleanupret:
cleanupret from %cp unwind to caller
}
; CHECK-LABEL: define void @test1(
; CHECK: call void @neverthrows()
; CHECK: %[[cp:.*]] = cleanuppad within none []
; CHECK-NEXT: unreachable
; CHECK: cleanupret from %[[cp]] unwind to caller
define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
invoke void @neverthrows()
to label %try.cont unwind label %catchswitch
try.cont:
ret void
catchswitch:
%cs = catchswitch within none [label %catchpad] unwind to caller
catchpad:
%cp = catchpad within %cs []
unreachable
ret:
ret void
}
; CHECK-LABEL: define void @test2(
; CHECK: call void @neverthrows()
; CHECK: %[[cs:.*]] = catchswitch within none [label
; CHECK: catchpad within %[[cs]] []
; CHECK-NEXT: unreachable
declare i32 @__CxxFrameHandler3(...)