Vendor import of llvm RELEASE_360/rc4 tag r229772 (effectively, 3.6.0 RC4):

https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc4@229772
This commit is contained in:
dim 2015-02-19 20:55:17 +00:00
parent 85d2764eab
commit 49b6407b6c
36 changed files with 643 additions and 123 deletions

View File

@ -5,11 +5,6 @@ LLVM 3.6 Release Notes
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 3.6 release. You may
prefer the `LLVM 3.5 Release Notes <http://llvm.org/releases/3.5.0/docs
/ReleaseNotes.html>`_.
Introduction
============
@ -26,10 +21,6 @@ have questions or comments, the `LLVM Developer's Mailing List
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
them.
Note that if you are reading this file from a Subversion checkout or the main
LLVM web page, this document applies to the *next* release, not the current
one. To see the release notes for a specific release, please see the `releases
page <http://llvm.org/releases/>`_.
Non-comprehensive list of changes in this release
=================================================
@ -544,6 +535,33 @@ new LLVM-based code generators "on the fly" for the designed processors and
loads them in to the compiler backend as runtime libraries to avoid
per-target recompilation of larger parts of the compiler chain.
Likely
------
`Likely <http://www.liblikely.org>`_ is an embeddable just-in-time Lisp for
image recognition and heterogenous computing. Algorithms are just-in-time
compiled using LLVM's MCJIT infrastructure to execute on single or
multi-threaded CPUs and potentially OpenCL SPIR or CUDA enabled GPUs.
Likely seeks to explore new optimizations for statistical learning
algorithms by moving them from an offline model generation step to the
compile-time evaluation of a function (the learning algorithm) with constant
arguments (the training data).
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
are underway.
Additional Information
======================

View File

@ -1,11 +1,6 @@
Overview
========
.. warning::
If you are using a released version of LLVM, see `the download page
<http://llvm.org/releases/>`_ to find your documentation.
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.

View File

@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
ExecutionEngine
InstCombine
MC
RuntimeDyld
ScalarOpts
Support
native

View File

@ -867,9 +867,11 @@ class SelectionDAG {
SDValue Offset, ISD::MemIndexedMode AM);
SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::LoadExtType);
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, bool IsTrunc);
/// getSrcValue - Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);

View File

@ -1970,13 +1970,17 @@ class MaskedLoadStoreSDNode : public MemSDNode {
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
public:
friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, DebugLoc dl,
SDValue *Operands, unsigned numOperands,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
EVT MemVT, MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
VTs, MemVT, MMO)
{}
VTs, MemVT, MMO) {
SubclassData |= (unsigned short)ETy;
}
ISD::LoadExtType getExtensionType() const {
return ISD::LoadExtType(SubclassData & 3);
}
const SDValue &getSrc0() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;
@ -1989,14 +1993,19 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
public:
friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, DebugLoc dl,
SDValue *Operands, unsigned numOperands,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
VTs, MemVT, MMO)
{}
VTs, MemVT, MMO) {
SubclassData |= (unsigned short)isTrunc;
}
/// isTruncatingStore - Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return SubclassData & 1; }
const SDValue &getData() const { return getOperand(3); }
const SDValue &getValue() const { return getOperand(3); }
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;

View File

@ -6,9 +6,6 @@
/* Exported configuration */
#include "llvm/Config/llvm-config.h"
/* Patch version of the LLVM API */
#cmakedefine LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
/* Bug report URL. */
#define BUG_REPORT_URL "${BUG_REPORT_URL}"

View File

@ -87,10 +87,13 @@
#cmakedefine LLVM_USE_OPROFILE 1
/* Major version of the LLVM API */
#cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
/* Minor version of the LLVM API */
#cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
#define LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
/* Patch version of the LLVM API */
#define LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
/* LLVM version string */
#define LLVM_VERSION_STRING "${PACKAGE_VERSION}"

View File

@ -92,6 +92,9 @@
/* Minor version of the LLVM API */
#undef LLVM_VERSION_MINOR
/* Patch version of the LLVM API */
#undef LLVM_VERSION_PATCH
/* LLVM version string */
#undef LLVM_VERSION_STRING

View File

@ -325,6 +325,9 @@ class ConstantAggregateZero : public Constant {
/// index.
Constant *getElementValue(unsigned Idx) const;
/// \brief Return the number of elements in the array, vector, or struct.
unsigned getNumElements() const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
///
static bool classof(const Value *V) {
@ -1196,6 +1199,9 @@ class UndefValue : public Constant {
/// index.
UndefValue *getElementValue(unsigned Idx) const;
/// \brief Return the number of elements in the array, vector, or struct.
unsigned getNumElements() const;
void destroyConstant() override;
/// Methods for support type inquiry through isa, cast, and dyn_cast:

View File

@ -538,9 +538,17 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
if (Metadata *MD = MDValuePtrs[Idx])
return MD;
// Create and return a placeholder, which will later be RAUW'd.
AnyFwdRefs = true;
// Track forward refs to be resolved later.
if (AnyFwdRefs) {
MinFwdRef = std::min(MinFwdRef, Idx);
MaxFwdRef = std::max(MaxFwdRef, Idx);
} else {
AnyFwdRefs = true;
MinFwdRef = MaxFwdRef = Idx;
}
++NumFwdRefs;
// Create and return a placeholder, which will later be RAUW'd.
Metadata *MD = MDNode::getTemporary(Context, None);
MDValuePtrs[Idx].reset(MD);
return MD;
@ -556,11 +564,15 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
return;
// Resolve any cycles.
for (auto &MD : MDValuePtrs) {
for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
auto &MD = MDValuePtrs[I];
assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference");
if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD))
N->resolveCycles();
}
// Make sure we return early again until there's another forward ref.
AnyFwdRefs = false;
}
Type *BitcodeReader::getTypeByID(unsigned ID) {

View File

@ -99,6 +99,8 @@ class BitcodeReaderValueList {
class BitcodeReaderMDValueList {
unsigned NumFwdRefs;
bool AnyFwdRefs;
unsigned MinFwdRef;
unsigned MaxFwdRef;
std::vector<TrackingMDRef> MDValuePtrs;
LLVMContext &Context;

View File

@ -4842,7 +4842,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Data = MST->getData();
SDValue Data = MST->getValue();
SDLoc DL(N);
// If the MSTORE data type requires splitting and the mask is provided by a
@ -4885,7 +4885,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
MST->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -4897,7 +4898,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
MST->isTruncatingStore());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@ -4958,7 +4960,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
ISD::NON_EXTLOAD);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -4969,7 +4972,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ISD::NON_EXTLOAD);
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@ -9482,6 +9486,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
// The narrowing should be profitable, the load/store operation should be
// legal (or custom) and the store size should be equal to the NewVT width.
while (NewBW < BitWidth &&
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
TLI.isNarrowingProfitable(VT, NewVT))) {

View File

@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
SDValue Mask = N->getMask();
EVT NewMaskVT = getSetCCResultType(NVT);
if (NewMaskVT != N->getMask().getValueType())
Mask = PromoteTargetBoolean(Mask, NewMaskVT);
SDLoc dl(N);
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOLoad, NVT.getStoreSize(),
N->getAlignment(), N->getAAInfo(), N->getRanges());
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
ExtMask, ExtSrc0, MMO);
Mask, ExtSrc0, N->getMemoryVT(),
N->getMemOperand(), ISD::SEXTLOAD);
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
assert(OpNo == 2 && "Only know how to promote the mask!");
SDValue DataOp = N->getData();
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDLoc dl(N);
bool TruncateStore = false;
if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp);
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
}
else {
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
}
else
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[2] = Mask;
NewOps[3] = DataOp;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
N->getMemoryVT(), N->getMemOperand(),
TruncateStore);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){

View File

@ -659,6 +659,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_Convert(SDNode *N);

View File

@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
// if Alignment is equal to the vector size,
// take the half of it for the second part
@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
ExtType);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
ExtType);
// Build a factor node to remember that this load is independent of the
@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();
SDValue Data = N->getData();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
N->isTruncatingStore());
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
N->isTruncatingStore());
// Build a factor node to remember that this store is independent of the
@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getSrc0());
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
// Rebuild memory operand because MemoryVT was changed
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
N->getAlignment(), N->getAAInfo(), N->getRanges());
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, MMO);
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType);
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
case ISD::ANY_EXTEND:
@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
SDValue StVal = MST->getValue();
// Widen the value
SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);
else {
// The mask should be widened as well
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
// We can't use ModifyToType() because we should fill the mask with
// zeroes
unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, MaskVT);
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}
assert(Mask.getValueType().getVectorNumElements() ==
WideVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
false);
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));

View File

@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
SDValue
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
MachineMemOperand *MMO) {
SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
}
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4, VTs,
VT, MMO);
ExtTy, MemVT, MMO);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, bool isTrunc) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
}
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4,
VTs, VT, MMO);
VTs, isTrunc, MemVT, MMO);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);

View File

@ -3667,7 +3667,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
getMachineMemOperand(MachinePointerInfo(PtrOperand),
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
MMO, false);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@ -3706,7 +3707,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
MachineMemOperand::MOLoad, VT.getStoreSize(),
Alignment, AAInfo, Ranges);
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD);
SDValue OutChain = Load.getValue(1);
DAG.setRoot(OutChain);
setValue(&I, Load);

View File

@ -4,7 +4,6 @@ add_llvm_library(LLVMExecutionEngine
ExecutionEngine.cpp
ExecutionEngineBindings.cpp
GDBRegistrationListener.cpp
RTDyldMemoryManager.cpp
TargetSelect.cpp
)

View File

@ -22,4 +22,4 @@ subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT
type = Library
name = ExecutionEngine
parent = Libraries
required_libraries = Core MC Object Support
required_libraries = Core MC Object Support RuntimeDyld

View File

@ -1,4 +1,5 @@
add_llvm_library(LLVMRuntimeDyld
RTDyldMemoryManager.cpp
RuntimeDyld.cpp
RuntimeDyldChecker.cpp
RuntimeDyldELF.cpp

View File

@ -257,11 +257,11 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr;
if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
return CAZ->getElementValue(Elt);
if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
return Elt < CAZ->getNumElements() ? CAZ->getElementValue(Elt) : nullptr;
if (const UndefValue *UV = dyn_cast<UndefValue>(this))
return UV->getElementValue(Elt);
return Elt < UV->getNumElements() ? UV->getElementValue(Elt) : nullptr;
if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
@ -764,6 +764,14 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
return getStructElement(Idx);
}
unsigned ConstantAggregateZero::getNumElements() const {
const Type *Ty = getType();
if (const auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (const auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
//===----------------------------------------------------------------------===//
// UndefValue Implementation
@ -797,7 +805,14 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
return getStructElement(Idx);
}
unsigned UndefValue::getNumElements() const {
const Type *Ty = getType();
if (const auto *AT = dyn_cast<ArrayType>(Ty))
return AT->getNumElements();
if (const auto *VT = dyn_cast<VectorType>(Ty))
return VT->getNumElements();
return Ty->getStructNumElements();
}
//===----------------------------------------------------------------------===//
// ConstantXXX Classes

View File

@ -1679,7 +1679,9 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
@ -24738,6 +24740,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// PerformMLOADCombine - Resolve extending loads
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
EVT VT = Mld->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
EVT LdVT = Mld->getMemoryVT();
SDLoc dl(Mld);
assert(LdVT != VT && "Cannot extend to the same type");
unsigned ToSz = VT.getVectorElementType().getSizeInBits();
unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for extending masked load");
unsigned SizeRatio = ToSz / FromSz;
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
// Convert Src0 value
SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
}
// Prepare the new mask
SDValue NewMask;
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
/// PerformMSTORECombine - Resolve truncating stores
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
if (!Mst->isTruncatingStore())
return SDValue();
EVT VT = Mst->getValue().getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
// We are going to use the original vector elt for storing.
// Accumulated smaller vector elements must be a multiple of the store size.
assert (((NumElems * FromSz) % ToSz) == 0 &&
"Unexpected ratio for truncating masked store");
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
// Create a type on which we perform the shuffle
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
SDValue NewMask;
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
// Mask and original value have the same type
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
NewMask, StVT, Mst->getMemOperand(), false);
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@ -25836,7 +25998,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);

View File

@ -403,7 +403,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
const Instruction& End,
AliasAnalysis::Location
Loc) {
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref);
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
}
///
@ -414,6 +414,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
StoreInst *Store0) {
DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
BasicBlock *BB0 = Store0->getParent();
for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
RBI != RBE; ++RBI) {
Instruction *Inst = &*RBI;
@ -422,13 +423,14 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
continue;
StoreInst *Store1 = cast<StoreInst>(Inst);
BasicBlock *BB0 = Store0->getParent();
AliasAnalysis::Location Loc0 = AA->getLocation(Store0);
AliasAnalysis::Location Loc1 = AA->getLocation(Store1);
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
!isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) &&
!isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) {
!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
BB1->back(), Loc1) &&
!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
BB0->back(), Loc0)) {
return Store1;
}
}

View File

@ -55,7 +55,7 @@ STATISTIC(NumRuntimeUnrolled,
/// - Branch around the original loop if the trip count is less
/// than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &VMap, Pass *P) {
@ -105,12 +105,19 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
}
}
// Create a branch around the orignal loop, which is taken if the
// trip count is less than the unroll factor.
// Create a branch around the orignal loop, which is taken if there are no
// iterations remaining to be executed after running the prologue.
Instruction *InsertPt = PrologEnd->getTerminator();
assert(Count != 0 && "nonsensical Count!");
// If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
// (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
// and all of the iterations of this loop were executed by the prologue. Note
// that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
Instruction *BrLoopExit =
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
ConstantInt::get(TripCount->getType(), Count));
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
@ -292,23 +299,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Only unroll loops with a computable trip count and the trip count needs
// to be an int value (allowing a pointer type is a TODO item)
const SCEV *BECount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BECountSC) ||
!BECountSC->getType()->isIntegerTy())
return false;
// If BECount is INT_MAX, we can't compute trip-count without overflow.
if (BECount->isAllOnesValue())
return false;
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration
const SCEV *TripCountSC =
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC))
return false;
// We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1.
if ((Count & (Count-1)) != 0)
if (!isPowerOf2_32(Count))
return false;
// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below. This check is equivalent to `Log2(Count) <
// BEWidth`.
if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
return false;
// If this loop is nested, then the loop unroller changes the code in
@ -330,16 +342,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
// Check if for no extra iterations, then jump to cloned/unrolled loop.
// We have to check that the trip count computation didn't overflow when
// adding one to the backedge taken count.
Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
// If ModVal is zero, we know that either
// 1. there are no iteration to be run in the prologue loop
// OR
// 2. the addition computing TripCount overflowed
//
// If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
// number of iterations that remain to be run in the original loop is a
// multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
// explicitly check this above).
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
// Branch to either the extra iterations or the cloned/unrolled loop
// We will fix up the true branch label when adding loop body copies
@ -362,10 +381,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
// If unroll count is 2 and we can't overflow in tripcount computation (which
// is BECount + 1), then we don't need a loop for prologue, and we can unroll
// it. We can be sure that we don't overflow only if tripcount is a constant.
bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
bool UnrollPrologue = Count == 2;
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
@ -391,7 +407,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Connect the prolog code to the original loop and update the
// PHI functions.
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
LPM->getAsPass());
NumRuntimeUnrolled++;
return true;

View File

@ -1874,6 +1874,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide store needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr = Builder.CreateBitCast(PartPtr,
@ -1902,6 +1903,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
Mask[Part] = reverseVector(Mask[Part]);
}
Instruction* NewLI;

View File

@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
}
; AVX2-LABEL: test15
; AVX2: vpmaskmovq
; AVX2: vpmaskmovd
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
}
; AVX2-LABEL: test17
; AVX2: vpmaskmovq
; AVX2: vblendvpd
; AVX2: vpmaskmovd
; AVX2: vblendvps
; AVX2: vpmovsxdq
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)

View File

@ -0,0 +1,114 @@
; ModuleID = 'bug.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; RUN: opt -O2 -S < %s | FileCheck %s
; CHECK_LABEL: main
; CHECK: if.end
; CHECK: store
; CHECK: memset
; CHECK: if.then
; CHECK: store
; CHECK: memset
@d = common global i32 0, align 4
@b = common global i32 0, align 4
@f = common global [1 x [3 x i8]] zeroinitializer, align 1
@e = common global i32 0, align 4
@c = common global i32 0, align 4
@a = common global i32 0, align 4
; Function Attrs: nounwind uwtable
define void @fn1() #0 {
entry:
store i32 0, i32* @d, align 4
br label %for.cond
for.cond: ; preds = %for.inc8, %entry
%0 = load i32* @d, align 4
%cmp = icmp slt i32 %0, 2
br i1 %cmp, label %for.body, label %for.end10
for.body: ; preds = %for.cond
%1 = load i32* @d, align 4
%idxprom = sext i32 %1 to i64
%2 = load i32* @b, align 4
%idxprom1 = sext i32 %2 to i64
%arrayidx = getelementptr inbounds [1 x [3 x i8]]* @f, i32 0, i64 %idxprom1
%arrayidx2 = getelementptr inbounds [3 x i8]* %arrayidx, i32 0, i64 %idxprom
store i8 0, i8* %arrayidx2, align 1
store i32 0, i32* @e, align 4
br label %for.cond3
for.cond3: ; preds = %for.inc, %for.body
%3 = load i32* @e, align 4
%cmp4 = icmp slt i32 %3, 3
br i1 %cmp4, label %for.body5, label %for.end
for.body5: ; preds = %for.cond3
%4 = load i32* @c, align 4
%tobool = icmp ne i32 %4, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %for.body5
%5 = load i32* @a, align 4
%dec = add nsw i32 %5, -1
store i32 %dec, i32* @a, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body5
%6 = load i32* @e, align 4
%idxprom6 = sext i32 %6 to i64
%arrayidx7 = getelementptr inbounds [3 x i8]* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0), i32 0, i64 %idxprom6
store i8 1, i8* %arrayidx7, align 1
br label %for.inc
for.inc: ; preds = %if.end
%7 = load i32* @e, align 4
%inc = add nsw i32 %7, 1
store i32 %inc, i32* @e, align 4
br label %for.cond3
for.end: ; preds = %for.cond3
br label %for.inc8
for.inc8: ; preds = %for.end
%8 = load i32* @d, align 4
%inc9 = add nsw i32 %8, 1
store i32 %inc9, i32* @d, align 4
br label %for.cond
for.end10: ; preds = %for.cond
ret void
}
; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
call void @fn1()
%0 = load i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
%conv = sext i8 %0 to i32
%cmp = icmp ne i32 %conv, 1
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @abort() #2
unreachable
if.end: ; preds = %entry
ret i32 0
}
; Function Attrs: noreturn nounwind
declare void @abort() #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noreturn nounwind }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.0 (trunk 229288) (llvm/trunk 229286:229290M)"}

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -instsimplify -S | FileCheck %s
@zeroinit = constant {} zeroinitializer
@undef = constant {} undef
define i32 @crash_on_zeroinit() {
; CHECK-LABEL: @crash_on_zeroinit
; CHECK: ret i32 0
%load = load i32* bitcast ({}* @zeroinit to i32*)
ret i32 %load
}
define i32 @crash_on_undef() {
; CHECK-LABEL: @crash_on_undef
; CHECK: ret i32 undef
%load = load i32* bitcast ({}* @undef to i32*)
ret i32 %load
}

View File

@ -4,9 +4,7 @@
; CHECK: %xtraiter = and i32 %n
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body.prol:
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]

View File

@ -3,7 +3,7 @@
; This tests that setting the unroll count works
; CHECK: for.body.prol:
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
; CHECK: br label %for.body.preheader.split
; CHECK: for.body:
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body

View File

@ -1,19 +1,28 @@
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; When prologue is fully unrolled, the branch on its end is unconditional.
; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
; like in this example, where it comes from an argument.
;
; This test is based on an example from here:
; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
;
; This test case documents how runtime loop unrolling handles the case
; when the backedge-count is -1.
; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
; and is 0. %xtraiter too is 0, signifying that the total trip-count
; is divisible by 2. The prologue then branches to the unrolled loop
; and executes the 2^32 iterations there, in groups of 2.
; CHECK: entry:
; CHECK-NEXT: %0 = add i32 %N, 1
; CHECK-NEXT: %xtraiter = and i32 %0, 1
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
; CHECK: while.body.prol:
; CHECK: br i1
; CHECK: br label %entry.split
; CHECK: entry.split:
; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) #0 {
define i32 @foo(i32 %N) {
entry:
br label %while.body
@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry
while.end: ; preds = %while.body
ret i32 %i
}
attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

View File

@ -418,3 +418,85 @@ for.end: ; preds = %for.cond
ret void
}
; Reverse loop
;void foo6(double *in, double *out, unsigned size, int *trigger) {
;
; for (int i=SIZE-1; i>=0; i--) {
; if (trigger[i] > 0) {
; out[i] = in[i] + (double) 0.5;
; }
; }
;}
;AVX2-LABEL: @foo6
;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
;AVX2: call <4 x double> @llvm.masked.load.v4f64
;AVX2: fadd <4 x double>
;AVX2: call void @llvm.masked.store.v4f64
;AVX2: ret void
;AVX512-LABEL: @foo6
;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
;AVX512: call <8 x double> @llvm.masked.load.v8f64
;AVX512: fadd <8 x double>
;AVX512: call void @llvm.masked.store.v8f64
;AVX512: ret void
define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
entry:
%in.addr = alloca double*, align 8
%out.addr = alloca double*, align 8
%size.addr = alloca i32, align 4
%trigger.addr = alloca i32*, align 8
%i = alloca i32, align 4
store double* %in, double** %in.addr, align 8
store double* %out, double** %out.addr, align 8
store i32 %size, i32* %size.addr, align 4
store i32* %trigger, i32** %trigger.addr, align 8
store i32 4095, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp sge i32 %0, 0
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load i32* %i, align 4
%idxprom = sext i32 %1 to i64
%2 = load i32** %trigger.addr, align 8
%arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
%3 = load i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %3, 0
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %for.body
%4 = load i32* %i, align 4
%idxprom2 = sext i32 %4 to i64
%5 = load double** %in.addr, align 8
%arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
%6 = load double* %arrayidx3, align 8
%add = fadd double %6, 5.000000e-01
%7 = load i32* %i, align 4
%idxprom4 = sext i32 %7 to i64
%8 = load double** %out.addr, align 8
%arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
store double %add, double* %arrayidx5, align 8
br label %if.end
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%9 = load i32* %i, align 4
%dec = add nsw i32 %9, -1
store i32 %dec, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
MC
MCJIT
Object
RuntimeDyld
SelectionDAG
Support
native

View File

@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
ExecutionEngine
Interpreter
MC
RuntimeDyld
Support
)

View File

@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
IPO
MC
MCJIT
RuntimeDyld
ScalarOpts
Support
Target