Vendor import of llvm RELEASE_360/rc4 tag r229772 (effectively, 3.6.0 RC4):

https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc4@229772
This commit is contained in:
dim 2015-02-19 20:55:17 +00:00
parent 85d2764eab
commit 49b6407b6c
36 changed files with 643 additions and 123 deletions

View File

@ -5,11 +5,6 @@ LLVM 3.6 Release Notes
.. contents:: .. contents::
:local: :local:
.. warning::
These are in-progress notes for the upcoming LLVM 3.6 release. You may
prefer the `LLVM 3.5 Release Notes <http://llvm.org/releases/3.5.0/docs
/ReleaseNotes.html>`_.
Introduction Introduction
============ ============
@ -26,10 +21,6 @@ have questions or comments, the `LLVM Developer's Mailing List
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
them. them.
Note that if you are reading this file from a Subversion checkout or the main
LLVM web page, this document applies to the *next* release, not the current
one. To see the release notes for a specific release, please see the `releases
page <http://llvm.org/releases/>`_.
Non-comprehensive list of changes in this release Non-comprehensive list of changes in this release
================================================= =================================================
@ -544,6 +535,33 @@ new LLVM-based code generators "on the fly" for the designed processors and
loads them in to the compiler backend as runtime libraries to avoid loads them in to the compiler backend as runtime libraries to avoid
per-target recompilation of larger parts of the compiler chain. per-target recompilation of larger parts of the compiler chain.
Likely
------
`Likely <http://www.liblikely.org>`_ is an embeddable just-in-time Lisp for
image recognition and heterogenous computing. Algorithms are just-in-time
compiled using LLVM's MCJIT infrastructure to execute on single or
multi-threaded CPUs and potentially OpenCL SPIR or CUDA enabled GPUs.
Likely seeks to explore new optimizations for statistical learning
algorithms by moving them from an offline model generation step to the
compile-time evaluation of a function (the learning algorithm) with constant
arguments (the training data).
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
are underway.
Additional Information Additional Information
====================== ======================

View File

@ -1,11 +1,6 @@
Overview Overview
======== ========
.. warning::
If you are using a released version of LLVM, see `the download page
<http://llvm.org/releases/>`_ to find your documentation.
The LLVM compiler infrastructure supports a wide range of projects, from The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small industrial strength compilers to specialized JIT applications to small
research projects. research projects.

View File

@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
ExecutionEngine ExecutionEngine
InstCombine InstCombine
MC MC
RuntimeDyld
ScalarOpts ScalarOpts
Support Support
native native

View File

@ -867,9 +867,11 @@ class SelectionDAG {
SDValue Offset, ISD::MemIndexedMode AM); SDValue Offset, ISD::MemIndexedMode AM);
SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
SDValue Mask, SDValue Src0, MachineMemOperand *MMO); SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::LoadExtType);
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO); SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, bool IsTrunc);
/// getSrcValue - Construct a node to track a Value* through the backend. /// getSrcValue - Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v); SDValue getSrcValue(const Value *v);

View File

@ -1970,13 +1970,17 @@ class MaskedLoadStoreSDNode : public MemSDNode {
class MaskedLoadSDNode : public MaskedLoadStoreSDNode { class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
public: public:
friend class SelectionDAG; friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, DebugLoc dl, MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
SDValue *Operands, unsigned numOperands, unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) EVT MemVT, MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands, : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
VTs, MemVT, MMO) VTs, MemVT, MMO) {
{} SubclassData |= (unsigned short)ETy;
}
ISD::LoadExtType getExtensionType() const {
return ISD::LoadExtType(SubclassData & 3);
}
const SDValue &getSrc0() const { return getOperand(3); } const SDValue &getSrc0() const { return getOperand(3); }
static bool classof(const SDNode *N) { static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD; return N->getOpcode() == ISD::MLOAD;
@ -1989,14 +1993,19 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
public: public:
friend class SelectionDAG; friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, DebugLoc dl, MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
SDValue *Operands, unsigned numOperands, unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands, : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
VTs, MemVT, MMO) VTs, MemVT, MMO) {
{} SubclassData |= (unsigned short)isTrunc;
}
/// isTruncatingStore - Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return SubclassData & 1; }
const SDValue &getData() const { return getOperand(3); } const SDValue &getValue() const { return getOperand(3); }
static bool classof(const SDNode *N) { static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE; return N->getOpcode() == ISD::MSTORE;

View File

@ -6,9 +6,6 @@
/* Exported configuration */ /* Exported configuration */
#include "llvm/Config/llvm-config.h" #include "llvm/Config/llvm-config.h"
/* Patch version of the LLVM API */
#cmakedefine LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
/* Bug report URL. */ /* Bug report URL. */
#define BUG_REPORT_URL "${BUG_REPORT_URL}" #define BUG_REPORT_URL "${BUG_REPORT_URL}"

View File

@ -87,10 +87,13 @@
#cmakedefine LLVM_USE_OPROFILE 1 #cmakedefine LLVM_USE_OPROFILE 1
/* Major version of the LLVM API */ /* Major version of the LLVM API */
#cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
/* Minor version of the LLVM API */ /* Minor version of the LLVM API */
#cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR} #define LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
/* Patch version of the LLVM API */
#define LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
/* LLVM version string */ /* LLVM version string */
#define LLVM_VERSION_STRING "${PACKAGE_VERSION}" #define LLVM_VERSION_STRING "${PACKAGE_VERSION}"

View File

@ -92,6 +92,9 @@
/* Minor version of the LLVM API */ /* Minor version of the LLVM API */
#undef LLVM_VERSION_MINOR #undef LLVM_VERSION_MINOR
/* Patch version of the LLVM API */
#undef LLVM_VERSION_PATCH
/* LLVM version string */ /* LLVM version string */
#undef LLVM_VERSION_STRING #undef LLVM_VERSION_STRING

View File

@ -325,6 +325,9 @@ class ConstantAggregateZero : public Constant {
/// index. /// index.
Constant *getElementValue(unsigned Idx) const; Constant *getElementValue(unsigned Idx) const;
/// \brief Return the number of elements in the array, vector, or struct.
unsigned getNumElements() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast: /// Methods for support type inquiry through isa, cast, and dyn_cast:
/// ///
static bool classof(const Value *V) { static bool classof(const Value *V) {
@ -1196,6 +1199,9 @@ class UndefValue : public Constant {
/// index. /// index.
UndefValue *getElementValue(unsigned Idx) const; UndefValue *getElementValue(unsigned Idx) const;
/// \brief Return the number of elements in the array, vector, or struct.
unsigned getNumElements() const;
void destroyConstant() override; void destroyConstant() override;
/// Methods for support type inquiry through isa, cast, and dyn_cast: /// Methods for support type inquiry through isa, cast, and dyn_cast:

View File

@ -538,9 +538,17 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
if (Metadata *MD = MDValuePtrs[Idx]) if (Metadata *MD = MDValuePtrs[Idx])
return MD; return MD;
// Create and return a placeholder, which will later be RAUW'd. // Track forward refs to be resolved later.
AnyFwdRefs = true; if (AnyFwdRefs) {
MinFwdRef = std::min(MinFwdRef, Idx);
MaxFwdRef = std::max(MaxFwdRef, Idx);
} else {
AnyFwdRefs = true;
MinFwdRef = MaxFwdRef = Idx;
}
++NumFwdRefs; ++NumFwdRefs;
// Create and return a placeholder, which will later be RAUW'd.
Metadata *MD = MDNode::getTemporary(Context, None); Metadata *MD = MDNode::getTemporary(Context, None);
MDValuePtrs[Idx].reset(MD); MDValuePtrs[Idx].reset(MD);
return MD; return MD;
@ -556,11 +564,15 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
return; return;
// Resolve any cycles. // Resolve any cycles.
for (auto &MD : MDValuePtrs) { for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
auto &MD = MDValuePtrs[I];
assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference"); assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference");
if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD)) if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD))
N->resolveCycles(); N->resolveCycles();
} }
// Make sure we return early again until there's another forward ref.
AnyFwdRefs = false;
} }
Type *BitcodeReader::getTypeByID(unsigned ID) { Type *BitcodeReader::getTypeByID(unsigned ID) {

View File

@ -99,6 +99,8 @@ class BitcodeReaderValueList {
class BitcodeReaderMDValueList { class BitcodeReaderMDValueList {
unsigned NumFwdRefs; unsigned NumFwdRefs;
bool AnyFwdRefs; bool AnyFwdRefs;
unsigned MinFwdRef;
unsigned MaxFwdRef;
std::vector<TrackingMDRef> MDValuePtrs; std::vector<TrackingMDRef> MDValuePtrs;
LLVMContext &Context; LLVMContext &Context;

View File

@ -4842,7 +4842,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask(); SDValue Mask = MST->getMask();
SDValue Data = MST->getData(); SDValue Data = MST->getValue();
SDLoc DL(N); SDLoc DL(N);
// If the MSTORE data type requires splitting and the mask is provided by a // If the MSTORE data type requires splitting and the mask is provided by a
@ -4885,7 +4885,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MachineMemOperand::MOStore, LoMemVT.getStoreSize(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges()); Alignment, MST->getAAInfo(), MST->getRanges());
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
MST->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8; unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -4897,7 +4898,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SecondHalfAlignment, MST->getAAInfo(), SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges()); MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
MST->isTruncatingStore());
AddToWorklist(Lo.getNode()); AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode()); AddToWorklist(Hi.getNode());
@ -4958,7 +4960,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges()); Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
ISD::NON_EXTLOAD);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8; unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -4969,7 +4972,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ISD::NON_EXTLOAD);
AddToWorklist(Lo.getNode()); AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode()); AddToWorklist(Hi.getNode());
@ -9482,6 +9486,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt); unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
// The narrowing should be profitable, the load/store operation should be
// legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth && while (NewBW < BitWidth &&
!(TLI.isOperationLegalOrCustom(Opc, NewVT) && !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
TLI.isNarrowingProfitable(VT, NewVT))) { TLI.isNarrowingProfitable(VT, NewVT))) {

View File

@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
SDValue Mask = N->getMask();
EVT NewMaskVT = getSetCCResultType(NVT);
if (NewMaskVT != N->getMask().getValueType())
Mask = PromoteTargetBoolean(Mask, NewMaskVT);
SDLoc dl(N); SDLoc dl(N);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOLoad, NVT.getStoreSize(),
N->getAlignment(), N->getAAInfo(), N->getRanges());
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
ExtMask, ExtSrc0, MMO); Mask, ExtSrc0, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
// Legalized the chain result - switch anything that used the old chain to // Legalized the chain result - switch anything that used the old chain to
// use the new one. // use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
assert(OpNo == 2 && "Only know how to promote the mask!"); assert(OpNo == 2 && "Only know how to promote the mask!");
SDValue DataOp = N->getData(); SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType(); EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask(); SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType(); EVT MaskVT = Mask.getValueType();
SDLoc dl(N); SDLoc dl(N);
bool TruncateStore = false;
if (!TLI.isTypeLegal(DataVT)) { if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp); DataOp = GetPromotedInteger(DataOp);
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
} }
else { else {
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
} }
else else
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
NewOps[2] = Mask; N->getMemoryVT(), N->getMemOperand(),
NewOps[3] = DataOp; TruncateStore);
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} }
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){

View File

@ -659,6 +659,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N);

View File

@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ptr = MLD->getBasePtr(); SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask(); SDValue Mask = MLD->getMask();
unsigned Alignment = MLD->getOriginalAlignment(); unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
// if Alignment is equal to the vector size, // if Alignment is equal to the vector size,
// take the half of it for the second part // take the half of it for the second part
@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges()); Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
ExtType);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8; unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType);
// Build a factor node to remember that this load is independent of the // Build a factor node to remember that this load is independent of the
@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
SDValue Ch = N->getChain(); SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr(); SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask(); SDValue Mask = N->getMask();
SDValue Data = N->getData(); SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT(); EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment(); unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N); SDLoc DL(N);
@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges()); Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
N->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8; unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges()); SecondHalfAlignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore());
// Build a factor node to remember that this store is independent of the // Build a factor node to remember that this store is independent of the
@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Mask = N->getMask(); SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType(); EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getSrc0()); SDValue Src0 = GetWidenedVector(N->getSrc0());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N); SDLoc dl(N);
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
} }
// Rebuild memory operand because MemoryVT was changed
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
N->getAlignment(), N->getAAInfo(), N->getRanges());
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, MMO); Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType);
// Legalized the chain result - switch anything that used the old chain to // Legalized the chain result - switch anything that used the old chain to
// use the new one. // use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::ANY_EXTEND: case ISD::ANY_EXTEND:
@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
} }
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
// Widen the value
SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);
else {
// The mask should be widened as well
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
// We can't use ModifyToType() because we should fill the mask with
// zeroes
unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, MaskVT);
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
assert(Mask.getValueType().getVectorNumElements() ==
WideVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false);
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp1 = GetWidenedVector(N->getOperand(1));

View File

@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
SDValue SDValue
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO) { MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
SDVTList VTs = getVTList(VT, MVT::Other); SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID; FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits()); ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
MMO->isVolatile(), MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isNonTemporal(),
MMO->isInvariant())); MMO->isInvariant()));
@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
} }
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4, VTs, dl.getDebugLoc(), Ops, 4, VTs,
VT, MMO); ExtTy, MemVT, MMO);
CSEMap.InsertNode(N, IP); CSEMap.InsertNode(N, IP);
InsertNode(N); InsertNode(N);
return SDValue(N, 0); return SDValue(N, 0);
} }
SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, bool isTrunc) {
assert(Chain.getValueType() == MVT::Other && assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type"); "Invalid chain type");
EVT VT = Val.getValueType(); EVT VT = Val.getValueType();
@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
} }
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4, dl.getDebugLoc(), Ops, 4,
VTs, VT, MMO); VTs, isTrunc, MemVT, MMO);
CSEMap.InsertNode(N, IP); CSEMap.InsertNode(N, IP);
InsertNode(N); InsertNode(N);
return SDValue(N, 0); return SDValue(N, 0);

View File

@ -3667,7 +3667,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
getMachineMemOperand(MachinePointerInfo(PtrOperand), getMachineMemOperand(MachinePointerInfo(PtrOperand),
MachineMemOperand::MOStore, VT.getStoreSize(), MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo); Alignment, AAInfo);
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
MMO, false);
DAG.setRoot(StoreNode); DAG.setRoot(StoreNode);
setValue(&I, StoreNode); setValue(&I, StoreNode);
} }
@ -3706,7 +3707,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
MachineMemOperand::MOLoad, VT.getStoreSize(), MachineMemOperand::MOLoad, VT.getStoreSize(),
Alignment, AAInfo, Ranges); Alignment, AAInfo, Ranges);
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD);
SDValue OutChain = Load.getValue(1); SDValue OutChain = Load.getValue(1);
DAG.setRoot(OutChain); DAG.setRoot(OutChain);
setValue(&I, Load); setValue(&I, Load);

View File

@ -4,7 +4,6 @@ add_llvm_library(LLVMExecutionEngine
ExecutionEngine.cpp ExecutionEngine.cpp
ExecutionEngineBindings.cpp ExecutionEngineBindings.cpp
GDBRegistrationListener.cpp GDBRegistrationListener.cpp
RTDyldMemoryManager.cpp
TargetSelect.cpp TargetSelect.cpp
) )

View File

@ -22,4 +22,4 @@ subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT
type = Library type = Library
name = ExecutionEngine name = ExecutionEngine
parent = Libraries parent = Libraries
required_libraries = Core MC Object Support required_libraries = Core MC Object Support RuntimeDyld

View File

@ -1,4 +1,5 @@
add_llvm_library(LLVMRuntimeDyld add_llvm_library(LLVMRuntimeDyld
RTDyldMemoryManager.cpp
RuntimeDyld.cpp RuntimeDyld.cpp
RuntimeDyldChecker.cpp RuntimeDyldChecker.cpp
RuntimeDyldELF.cpp RuntimeDyldELF.cpp

View File

@ -257,11 +257,11 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr; return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr;
if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this)) if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
return CAZ->getElementValue(Elt); return Elt < CAZ->getNumElements() ? CAZ->getElementValue(Elt) : nullptr;
if (const UndefValue *UV = dyn_cast<UndefValue>(this)) if (const UndefValue *UV = dyn_cast<UndefValue>(this))
return UV->getElementValue(Elt); return Elt < UV->getNumElements() ? UV->getElementValue(Elt) : nullptr;
if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this)) if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
@ -764,6 +764,14 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
return getStructElement(Idx); return getStructElement(Idx);
} }
unsigned ConstantAggregateZero::getNumElements() const {
const Type *Ty = getType();
if (const auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (const auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// UndefValue Implementation // UndefValue Implementation
@ -797,7 +805,14 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
return getStructElement(Idx); return getStructElement(Idx);
} }
unsigned UndefValue::getNumElements() const {
const Type *Ty = getType();
if (const auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (const auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// ConstantXXX Classes // ConstantXXX Classes

View File

@ -1679,7 +1679,9 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND);
@ -24738,6 +24740,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return SDValue(); return SDValue();
} }
/// PerformMLOADCombine - Resolve extending loads
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
EVT VT = Mld->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
EVT LdVT = Mld->getMemoryVT();
SDLoc dl(Mld);
assert(LdVT != VT && "Cannot extend to the same type");
unsigned ToSz = VT.getVectorElementType().getSizeInBits();
unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for extending masked load");
unsigned SizeRatio = ToSz / FromSz;
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
// Convert Src0 value
SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
}
// Prepare the new mask
SDValue NewMask;
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
/// PerformMSTORECombine - Resolve truncating stores
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
if (!Mst->isTruncatingStore())
return SDValue();
EVT VT = Mst->getValue().getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
assert (((NumElems * FromSz) % ToSz) == 0 &&
"Unexpected ratio for truncating masked store");
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
SDValue NewMask;
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
NewMask, StVT, Mst->getMemOperand(), false);
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) { const X86Subtarget *Subtarget) {
@ -25836,7 +25998,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);

View File

@ -403,7 +403,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
const Instruction& End, const Instruction& End,
AliasAnalysis::Location AliasAnalysis::Location
Loc) { Loc) {
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref); return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
} }
/// ///
@ -414,6 +414,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1, StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
StoreInst *Store0) { StoreInst *Store0) {
DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n"); DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
BasicBlock *BB0 = Store0->getParent();
for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend(); for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
RBI != RBE; ++RBI) { RBI != RBE; ++RBI) {
Instruction *Inst = &*RBI; Instruction *Inst = &*RBI;
@ -422,13 +423,14 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
continue; continue;
StoreInst *Store1 = cast<StoreInst>(Inst); StoreInst *Store1 = cast<StoreInst>(Inst);
BasicBlock *BB0 = Store0->getParent();
AliasAnalysis::Location Loc0 = AA->getLocation(Store0); AliasAnalysis::Location Loc0 = AA->getLocation(Store0);
AliasAnalysis::Location Loc1 = AA->getLocation(Store1); AliasAnalysis::Location Loc1 = AA->getLocation(Store1);
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
!isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) && !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
!isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) { BB1->back(), Loc1) &&
!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
BB0->back(), Loc0)) {
return Store1; return Store1;
} }
} }

View File

@ -55,7 +55,7 @@ STATISTIC(NumRuntimeUnrolled,
/// - Branch around the original loop if the trip count is less /// - Branch around the original loop if the trip count is less
/// than the unroll factor. /// than the unroll factor.
/// ///
static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd, BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH, BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &VMap, Pass *P) { ValueToValueMapTy &VMap, Pass *P) {
@ -105,12 +105,19 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
} }
} }
// Create a branch around the orignal loop, which is taken if the // Create a branch around the orignal loop, which is taken if there are no
// trip count is less than the unroll factor. // iterations remaining to be executed after running the prologue.
Instruction *InsertPt = PrologEnd->getTerminator(); Instruction *InsertPt = PrologEnd->getTerminator();
assert(Count != 0 && "nonsensical Count!");
// If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
// (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
// and all of the iterations of this loop were executed by the prologue. Note
// that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
Instruction *BrLoopExit = Instruction *BrLoopExit =
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
ConstantInt::get(TripCount->getType(), Count)); ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock(); BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only"); assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees // Split the exit to maintain loop canonicalization guarantees
@ -292,23 +299,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Only unroll loops with a computable trip count and the trip count needs // Only unroll loops with a computable trip count and the trip count needs
// to be an int value (allowing a pointer type is a TODO item) // to be an int value (allowing a pointer type is a TODO item)
const SCEV *BECount = SE->getBackedgeTakenCount(L); const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
return false; return false;
// If BECount is INT_MAX, we can't compute trip-count without overflow. unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
if (BECount->isAllOnesValue())
return false;
// Add 1 since the backedge count doesn't include the first loop iteration // Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC = const SCEV *TripCountSC =
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC)) if (isa<SCEVCouldNotCompute>(TripCountSC))
return false; return false;
// We only handle cases when the unroll factor is a power of 2. // We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1. // Count is the loop unroll factor, the number of extra copies added + 1.
if ((Count & (Count-1)) != 0) if (!isPowerOf2_32(Count))
return false;
// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below. This check is equivalent to `Log2(Count) <
// BEWidth`.
if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
return false; return false;
// If this loop is nested, then the loop unroller changes the code in // If this loop is nested, then the loop unroller changes the code in
@ -330,16 +342,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
SCEVExpander Expander(*SE, "loop-unroll"); SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR); PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
IRBuilder<> B(PreHeaderBR); IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
// Check if for no extra iterations, then jump to cloned/unrolled loop. // If ModVal is zero, we know that either
// We have to check that the trip count computation didn't overflow when // 1. there are no iteration to be run in the prologue loop
// adding one to the backedge taken count. // OR
Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); // 2. the addition computing TripCount overflowed
Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); //
Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
// number of iterations that remain to be run in the original loop is a
// multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
// explicitly check this above).
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop // Branch to either the extra iterations or the cloned/unrolled loop
// We will fix up the true branch label when adding loop body copies // We will fix up the true branch label when adding loop body copies
@ -362,10 +381,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
std::vector<BasicBlock *> NewBlocks; std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap; ValueToValueMapTy VMap;
// If unroll count is 2 and we can't overflow in tripcount computation (which bool UnrollPrologue = Count == 2;
// is BECount + 1), then we don't need a loop for prologue, and we can unroll
// it. We can be sure that we don't overflow only if tripcount is a constant.
bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
// Clone all the basic blocks in the loop. If Count is 2, we don't clone // Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra // the loop, otherwise we create a cloned loop to execute the extra
@ -391,7 +407,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Connect the prolog code to the original loop and update the // Connect the prolog code to the original loop and update the
// PHI functions. // PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]); BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
LPM->getAsPass()); LPM->getAsPass());
NumRuntimeUnrolled++; NumRuntimeUnrolled++;
return true; return true;

View File

@ -1874,6 +1874,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide store needs to start at the last vector element. // wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
} }
Value *VecPtr = Builder.CreateBitCast(PartPtr, Value *VecPtr = Builder.CreateBitCast(PartPtr,
@ -1902,6 +1903,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element. // wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
} }
Instruction* NewLI; Instruction* NewLI;

View File

@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
} }
; AVX2-LABEL: test15 ; AVX2-LABEL: test15
; AVX2: vpmaskmovq ; AVX2: vpmaskmovd
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer %mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
} }
; AVX2-LABEL: test17 ; AVX2-LABEL: test17
; AVX2: vpmaskmovq ; AVX2: vpmaskmovd
; AVX2: vblendvpd ; AVX2: vblendvps
; AVX2: vpmovsxdq
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer %mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) %res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)

View File

@ -0,0 +1,114 @@
; ModuleID = 'bug.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt -O2 -S < %s | FileCheck %s
; CHECK_LABEL: main
; CHECK: if.end
; CHECK: store
; CHECK: memset
; CHECK: if.then
; CHECK: store
; CHECK: memset
@d = common global i32 0, align 4
@b = common global i32 0, align 4
@f = common global [1 x [3 x i8]] zeroinitializer, align 1
@e = common global i32 0, align 4
@c = common global i32 0, align 4
@a = common global i32 0, align 4
; Function Attrs: nounwind uwtable
define void @fn1() #0 {
entry:
store i32 0, i32* @d, align 4
br label %for.cond
for.cond: ; preds = %for.inc8, %entry
%0 = load i32* @d, align 4
%cmp = icmp slt i32 %0, 2
br i1 %cmp, label %for.body, label %for.end10
for.body: ; preds = %for.cond
%1 = load i32* @d, align 4
%idxprom = sext i32 %1 to i64
%2 = load i32* @b, align 4
%idxprom1 = sext i32 %2 to i64
%arrayidx = getelementptr inbounds [1 x [3 x i8]]* @f, i32 0, i64 %idxprom1
%arrayidx2 = getelementptr inbounds [3 x i8]* %arrayidx, i32 0, i64 %idxprom
store i8 0, i8* %arrayidx2, align 1
store i32 0, i32* @e, align 4
br label %for.cond3
for.cond3: ; preds = %for.inc, %for.body
%3 = load i32* @e, align 4
%cmp4 = icmp slt i32 %3, 3
br i1 %cmp4, label %for.body5, label %for.end
for.body5: ; preds = %for.cond3
%4 = load i32* @c, align 4
%tobool = icmp ne i32 %4, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %for.body5
%5 = load i32* @a, align 4
%dec = add nsw i32 %5, -1
store i32 %dec, i32* @a, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body5
%6 = load i32* @e, align 4
%idxprom6 = sext i32 %6 to i64
%arrayidx7 = getelementptr inbounds [3 x i8]* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0), i32 0, i64 %idxprom6
store i8 1, i8* %arrayidx7, align 1
br label %for.inc
for.inc: ; preds = %if.end
%7 = load i32* @e, align 4
%inc = add nsw i32 %7, 1
store i32 %inc, i32* @e, align 4
br label %for.cond3
for.end: ; preds = %for.cond3
br label %for.inc8
for.inc8: ; preds = %for.end
%8 = load i32* @d, align 4
%inc9 = add nsw i32 %8, 1
store i32 %inc9, i32* @d, align 4
br label %for.cond
for.end10: ; preds = %for.cond
ret void
}
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
call void @fn1()
%0 = load i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
%conv = sext i8 %0 to i32
%cmp = icmp ne i32 %conv, 1
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @abort() #2
unreachable
if.end: ; preds = %entry
ret i32 0
}
; Function Attrs: noreturn nounwind
declare void @abort() #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.0 (trunk 229288) (llvm/trunk 229286:229290M)"}

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -instsimplify -S | FileCheck %s
@zeroinit = constant {} zeroinitializer
@undef = constant {} undef
define i32 @crash_on_zeroinit() {
; CHECK-LABEL: @crash_on_zeroinit
; CHECK: ret i32 0
%load = load i32* bitcast ({}* @zeroinit to i32*)
ret i32 %load
}
define i32 @crash_on_undef() {
; CHECK-LABEL: @crash_on_undef
; CHECK: ret i32 undef
%load = load i32* bitcast ({}* @undef to i32*)
ret i32 %load
}

View File

@ -4,9 +4,7 @@
; CHECK: %xtraiter = and i32 %n ; CHECK: %xtraiter = and i32 %n
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
; CHECK: %lcmp.overflow = icmp eq i32 %n, 0 ; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body.prol: ; CHECK: for.body.prol:
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ] ; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]

View File

@ -3,7 +3,7 @@
; This tests that setting the unroll count works ; This tests that setting the unroll count works
; CHECK: for.body.prol: ; CHECK: for.body.prol:
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split ; CHECK: br label %for.body.preheader.split
; CHECK: for.body: ; CHECK: for.body:
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body ; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body

View File

@ -1,19 +1,28 @@
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s ; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; When prologue is fully unrolled, the branch on its end is unconditional. ; This test case documents how runtime loop unrolling handles the case
; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow, ; when the backedge-count is -1.
; like in this example, where it comes from an argument.
; ; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
; This test is based on an example from here: ; and is 0. %xtraiter too is 0, signifying that the total trip-count
; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out ; is divisible by 2. The prologue then branches to the unrolled loop
; ; and executes the 2^32 iterations there, in groups of 2.
; CHECK: entry:
; CHECK-NEXT: %0 = add i32 %N, 1
; CHECK-NEXT: %xtraiter = and i32 %0, 1
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
; CHECK: while.body.prol: ; CHECK: while.body.prol:
; CHECK: br i1 ; CHECK: br label %entry.split
; CHECK: entry.split: ; CHECK: entry.split:
; Function Attrs: nounwind readnone ssp uwtable ; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) #0 { define i32 @foo(i32 %N) {
entry: entry:
br label %while.body br label %while.body
@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry
while.end: ; preds = %while.body while.end: ; preds = %while.body
ret i32 %i ret i32 %i
} }
attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -418,3 +418,85 @@ for.end: ; preds = %for.cond
ret void ret void
} }
; Reverse loop
;void foo6(double *in, double *out, unsigned size, int *trigger) {
;
; for (int i=SIZE-1; i>=0; i--) {
; if (trigger[i] > 0) {
; out[i] = in[i] + (double) 0.5;
; }
; }
;}
;AVX2-LABEL: @foo6
;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
;AVX2: call <4 x double> @llvm.masked.load.v4f64
;AVX2: fadd <4 x double>
;AVX2: call void @llvm.masked.store.v4f64
;AVX2: ret void
;AVX512-LABEL: @foo6
;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
;AVX512: call <8 x double> @llvm.masked.load.v8f64
;AVX512: fadd <8 x double>
;AVX512: call void @llvm.masked.store.v8f64
;AVX512: ret void
define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
entry:
%in.addr = alloca double*, align 8
%out.addr = alloca double*, align 8
%size.addr = alloca i32, align 4
%trigger.addr = alloca i32*, align 8
%i = alloca i32, align 4
store double* %in, double** %in.addr, align 8
store double* %out, double** %out.addr, align 8
store i32 %size, i32* %size.addr, align 4
store i32* %trigger, i32** %trigger.addr, align 8
store i32 4095, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp sge i32 %0, 0
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load i32* %i, align 4
%idxprom = sext i32 %1 to i64
%2 = load i32** %trigger.addr, align 8
%arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
%3 = load i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %3, 0
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%4 = load i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
%5 = load double** %in.addr, align 8
%arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
%6 = load double* %arrayidx3, align 8
%add = fadd double %6, 5.000000e-01
%7 = load i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
%8 = load double** %out.addr, align 8
%arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
store double %add, double* %arrayidx5, align 8
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%9 = load i32* %i, align 4
%dec = add nsw i32 %9, -1
store i32 %dec, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
MC MC
MCJIT MCJIT
Object Object
RuntimeDyld
SelectionDAG SelectionDAG
Support Support
native native

View File

@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
ExecutionEngine ExecutionEngine
Interpreter Interpreter
MC MC
RuntimeDyld
Support Support
) )

View File

@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
IPO IPO
MC MC
MCJIT MCJIT
RuntimeDyld
ScalarOpts ScalarOpts
Support Support
Target Target