Vendor import of llvm RELEASE_360/rc4 tag r229772 (effectively, 3.6.0 RC4):
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc4@229772
This commit is contained in:
parent
85d2764eab
commit
49b6407b6c
@ -5,11 +5,6 @@ LLVM 3.6 Release Notes
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 3.6 release. You may
|
||||
prefer the `LLVM 3.5 Release Notes <http://llvm.org/releases/3.5.0/docs
|
||||
/ReleaseNotes.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
@ -26,10 +21,6 @@ have questions or comments, the `LLVM Developer's Mailing List
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
|
||||
them.
|
||||
|
||||
Note that if you are reading this file from a Subversion checkout or the main
|
||||
LLVM web page, this document applies to the *next* release, not the current
|
||||
one. To see the release notes for a specific release, please see the `releases
|
||||
page <http://llvm.org/releases/>`_.
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
@ -544,6 +535,33 @@ new LLVM-based code generators "on the fly" for the designed processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
|
||||
Likely
|
||||
------
|
||||
|
||||
`Likely <http://www.liblikely.org>`_ is an embeddable just-in-time Lisp for
|
||||
image recognition and heterogenous computing. Algorithms are just-in-time
|
||||
compiled using LLVM's MCJIT infrastructure to execute on single or
|
||||
multi-threaded CPUs and potentially OpenCL SPIR or CUDA enabled GPUs.
|
||||
Likely seeks to explore new optimizations for statistical learning
|
||||
algorithms by moving them from an offline model generation step to the
|
||||
compile-time evaluation of a function (the learning algorithm) with constant
|
||||
arguments (the training data).
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
|
||||
PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
||||
|
@ -1,11 +1,6 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
|
@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
ExecutionEngine
|
||||
InstCombine
|
||||
MC
|
||||
RuntimeDyld
|
||||
ScalarOpts
|
||||
Support
|
||||
native
|
||||
|
@ -867,9 +867,11 @@ class SelectionDAG {
|
||||
SDValue Offset, ISD::MemIndexedMode AM);
|
||||
|
||||
SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
|
||||
SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
|
||||
SDValue Mask, SDValue Src0, EVT MemVT,
|
||||
MachineMemOperand *MMO, ISD::LoadExtType);
|
||||
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
|
||||
SDValue Ptr, SDValue Mask, EVT MemVT,
|
||||
MachineMemOperand *MMO, bool IsTrunc);
|
||||
/// getSrcValue - Construct a node to track a Value* through the backend.
|
||||
SDValue getSrcValue(const Value *v);
|
||||
|
||||
|
@ -1970,13 +1970,17 @@ class MaskedLoadStoreSDNode : public MemSDNode {
|
||||
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
|
||||
public:
|
||||
friend class SelectionDAG;
|
||||
MaskedLoadSDNode(unsigned Order, DebugLoc dl,
|
||||
SDValue *Operands, unsigned numOperands,
|
||||
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
|
||||
MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
|
||||
unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
|
||||
EVT MemVT, MachineMemOperand *MMO)
|
||||
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
|
||||
VTs, MemVT, MMO)
|
||||
{}
|
||||
VTs, MemVT, MMO) {
|
||||
SubclassData |= (unsigned short)ETy;
|
||||
}
|
||||
|
||||
ISD::LoadExtType getExtensionType() const {
|
||||
return ISD::LoadExtType(SubclassData & 3);
|
||||
}
|
||||
const SDValue &getSrc0() const { return getOperand(3); }
|
||||
static bool classof(const SDNode *N) {
|
||||
return N->getOpcode() == ISD::MLOAD;
|
||||
@ -1989,14 +1993,19 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
|
||||
|
||||
public:
|
||||
friend class SelectionDAG;
|
||||
MaskedStoreSDNode(unsigned Order, DebugLoc dl,
|
||||
SDValue *Operands, unsigned numOperands,
|
||||
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
|
||||
MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
|
||||
unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
|
||||
MachineMemOperand *MMO)
|
||||
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
|
||||
VTs, MemVT, MMO)
|
||||
{}
|
||||
VTs, MemVT, MMO) {
|
||||
SubclassData |= (unsigned short)isTrunc;
|
||||
}
|
||||
/// isTruncatingStore - Return true if the op does a truncation before store.
|
||||
/// For integers this is the same as doing a TRUNCATE and storing the result.
|
||||
/// For floats, it is the same as doing an FP_ROUND and storing the result.
|
||||
bool isTruncatingStore() const { return SubclassData & 1; }
|
||||
|
||||
const SDValue &getData() const { return getOperand(3); }
|
||||
const SDValue &getValue() const { return getOperand(3); }
|
||||
|
||||
static bool classof(const SDNode *N) {
|
||||
return N->getOpcode() == ISD::MSTORE;
|
||||
|
@ -6,9 +6,6 @@
|
||||
/* Exported configuration */
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
|
||||
/* Patch version of the LLVM API */
|
||||
#cmakedefine LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
|
||||
|
||||
/* Bug report URL. */
|
||||
#define BUG_REPORT_URL "${BUG_REPORT_URL}"
|
||||
|
||||
|
@ -87,10 +87,13 @@
|
||||
#cmakedefine LLVM_USE_OPROFILE 1
|
||||
|
||||
/* Major version of the LLVM API */
|
||||
#cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
|
||||
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
|
||||
|
||||
/* Minor version of the LLVM API */
|
||||
#cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
|
||||
#define LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
|
||||
|
||||
/* Patch version of the LLVM API */
|
||||
#define LLVM_VERSION_PATCH ${LLVM_VERSION_PATCH}
|
||||
|
||||
/* LLVM version string */
|
||||
#define LLVM_VERSION_STRING "${PACKAGE_VERSION}"
|
||||
|
@ -92,6 +92,9 @@
|
||||
/* Minor version of the LLVM API */
|
||||
#undef LLVM_VERSION_MINOR
|
||||
|
||||
/* Patch version of the LLVM API */
|
||||
#undef LLVM_VERSION_PATCH
|
||||
|
||||
/* LLVM version string */
|
||||
#undef LLVM_VERSION_STRING
|
||||
|
||||
|
@ -325,6 +325,9 @@ class ConstantAggregateZero : public Constant {
|
||||
/// index.
|
||||
Constant *getElementValue(unsigned Idx) const;
|
||||
|
||||
/// \brief Return the number of elements in the array, vector, or struct.
|
||||
unsigned getNumElements() const;
|
||||
|
||||
/// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
///
|
||||
static bool classof(const Value *V) {
|
||||
@ -1196,6 +1199,9 @@ class UndefValue : public Constant {
|
||||
/// index.
|
||||
UndefValue *getElementValue(unsigned Idx) const;
|
||||
|
||||
/// \brief Return the number of elements in the array, vector, or struct.
|
||||
unsigned getNumElements() const;
|
||||
|
||||
void destroyConstant() override;
|
||||
|
||||
/// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
|
@ -538,9 +538,17 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
|
||||
if (Metadata *MD = MDValuePtrs[Idx])
|
||||
return MD;
|
||||
|
||||
// Create and return a placeholder, which will later be RAUW'd.
|
||||
AnyFwdRefs = true;
|
||||
// Track forward refs to be resolved later.
|
||||
if (AnyFwdRefs) {
|
||||
MinFwdRef = std::min(MinFwdRef, Idx);
|
||||
MaxFwdRef = std::max(MaxFwdRef, Idx);
|
||||
} else {
|
||||
AnyFwdRefs = true;
|
||||
MinFwdRef = MaxFwdRef = Idx;
|
||||
}
|
||||
++NumFwdRefs;
|
||||
|
||||
// Create and return a placeholder, which will later be RAUW'd.
|
||||
Metadata *MD = MDNode::getTemporary(Context, None);
|
||||
MDValuePtrs[Idx].reset(MD);
|
||||
return MD;
|
||||
@ -556,11 +564,15 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
|
||||
return;
|
||||
|
||||
// Resolve any cycles.
|
||||
for (auto &MD : MDValuePtrs) {
|
||||
for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
|
||||
auto &MD = MDValuePtrs[I];
|
||||
assert(!(MD && isa<MDNodeFwdDecl>(MD)) && "Unexpected forward reference");
|
||||
if (auto *N = dyn_cast_or_null<UniquableMDNode>(MD))
|
||||
N->resolveCycles();
|
||||
}
|
||||
|
||||
// Make sure we return early again until there's another forward ref.
|
||||
AnyFwdRefs = false;
|
||||
}
|
||||
|
||||
Type *BitcodeReader::getTypeByID(unsigned ID) {
|
||||
|
@ -99,6 +99,8 @@ class BitcodeReaderValueList {
|
||||
class BitcodeReaderMDValueList {
|
||||
unsigned NumFwdRefs;
|
||||
bool AnyFwdRefs;
|
||||
unsigned MinFwdRef;
|
||||
unsigned MaxFwdRef;
|
||||
std::vector<TrackingMDRef> MDValuePtrs;
|
||||
|
||||
LLVMContext &Context;
|
||||
|
@ -4842,7 +4842,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
|
||||
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
|
||||
SDValue Mask = MST->getMask();
|
||||
SDValue Data = MST->getData();
|
||||
SDValue Data = MST->getValue();
|
||||
SDLoc DL(N);
|
||||
|
||||
// If the MSTORE data type requires splitting and the mask is provided by a
|
||||
@ -4885,7 +4885,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, MST->getAAInfo(), MST->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
|
||||
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
|
||||
MST->isTruncatingStore());
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -4897,7 +4898,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
SecondHalfAlignment, MST->getAAInfo(),
|
||||
MST->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
|
||||
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
MST->isTruncatingStore());
|
||||
|
||||
AddToWorklist(Lo.getNode());
|
||||
AddToWorklist(Hi.getNode());
|
||||
@ -4958,7 +4960,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
|
||||
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -4969,7 +4972,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
|
||||
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
|
||||
AddToWorklist(Lo.getNode());
|
||||
AddToWorklist(Hi.getNode());
|
||||
@ -9482,6 +9486,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
||||
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
|
||||
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
|
||||
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
|
||||
// The narrowing should be profitable, the load/store operation should be
|
||||
// legal (or custom) and the store size should be equal to the NewVT width.
|
||||
while (NewBW < BitWidth &&
|
||||
!(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
|
||||
TLI.isNarrowingProfitable(VT, NewVT))) {
|
||||
|
@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
|
||||
SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
|
||||
|
||||
SDValue Mask = N->getMask();
|
||||
EVT NewMaskVT = getSetCCResultType(NVT);
|
||||
if (NewMaskVT != N->getMask().getValueType())
|
||||
Mask = PromoteTargetBoolean(Mask, NewMaskVT);
|
||||
SDLoc dl(N);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, NVT.getStoreSize(),
|
||||
N->getAlignment(), N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
|
||||
ExtMask, ExtSrc0, MMO);
|
||||
Mask, ExtSrc0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ISD::SEXTLOAD);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
|
||||
|
||||
assert(OpNo == 2 && "Only know how to promote the mask!");
|
||||
SDValue DataOp = N->getData();
|
||||
SDValue DataOp = N->getValue();
|
||||
EVT DataVT = DataOp.getValueType();
|
||||
SDValue Mask = N->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDLoc dl(N);
|
||||
|
||||
bool TruncateStore = false;
|
||||
if (!TLI.isTypeLegal(DataVT)) {
|
||||
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
|
||||
DataOp = GetPromotedInteger(DataOp);
|
||||
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
|
||||
TruncateStore = true;
|
||||
}
|
||||
else {
|
||||
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
|
||||
@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
|
||||
}
|
||||
else
|
||||
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
|
||||
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
|
||||
NewOps[2] = Mask;
|
||||
NewOps[3] = DataOp;
|
||||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
|
||||
N->getMemoryVT(), N->getMemOperand(),
|
||||
TruncateStore);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
|
||||
|
@ -659,6 +659,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
|
||||
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
||||
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue WidenVecOp_STORE(SDNode* N);
|
||||
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_SETCC(SDNode* N);
|
||||
|
||||
SDValue WidenVecOp_Convert(SDNode *N);
|
||||
|
@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
SDValue Ptr = MLD->getBasePtr();
|
||||
SDValue Mask = MLD->getMask();
|
||||
unsigned Alignment = MLD->getOriginalAlignment();
|
||||
ISD::LoadExtType ExtType = MLD->getExtensionType();
|
||||
|
||||
// if Alignment is equal to the vector size,
|
||||
// take the half of it for the second part
|
||||
@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
|
||||
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
|
||||
ExtType);
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
|
||||
@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
|
||||
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ExtType);
|
||||
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Data = N->getData();
|
||||
SDValue Data = N->getValue();
|
||||
EVT MemoryVT = N->getMemoryVT();
|
||||
unsigned Alignment = N->getOriginalAlignment();
|
||||
SDLoc DL(N);
|
||||
@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
|
||||
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
|
||||
N->isTruncatingStore());
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
|
||||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
N->isTruncatingStore());
|
||||
|
||||
|
||||
// Build a factor node to remember that this store is independent of the
|
||||
@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
SDValue Mask = N->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDValue Src0 = GetWidenedVector(N->getSrc0());
|
||||
ISD::LoadExtType ExtType = N->getExtensionType();
|
||||
SDLoc dl(N);
|
||||
|
||||
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
|
||||
@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
|
||||
}
|
||||
|
||||
// Rebuild memory operand because MemoryVT was changed
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
|
||||
N->getAlignment(), N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
|
||||
Mask, Src0, MMO);
|
||||
Mask, Src0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ExtType);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
|
||||
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
|
||||
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
|
||||
|
||||
case ISD::ANY_EXTEND:
|
||||
@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
|
||||
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
|
||||
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
|
||||
SDValue Mask = MST->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDValue StVal = MST->getValue();
|
||||
// Widen the value
|
||||
SDValue WideVal = GetWidenedVector(StVal);
|
||||
SDLoc dl(N);
|
||||
|
||||
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
|
||||
Mask = GetWidenedVector(Mask);
|
||||
else {
|
||||
// The mask should be widened as well
|
||||
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
|
||||
// We can't use ModifyToType() because we should fill the mask with
|
||||
// zeroes
|
||||
unsigned WidenNumElts = BoolVT.getVectorNumElements();
|
||||
unsigned MaskNumElts = MaskVT.getVectorNumElements();
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, MaskVT);
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
|
||||
}
|
||||
assert(Mask.getValueType().getVectorNumElements() ==
|
||||
WideVal.getValueType().getVectorNumElements() &&
|
||||
"Mask and data vectors should have the same number of elements");
|
||||
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
|
||||
Mask, MST->getMemoryVT(), MST->getMemOperand(),
|
||||
false);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
|
||||
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
|
||||
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
|
||||
|
@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
|
||||
|
||||
SDValue
|
||||
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
|
||||
SDValue Ptr, SDValue Mask, SDValue Src0,
|
||||
MachineMemOperand *MMO) {
|
||||
SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
|
||||
MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
|
||||
|
||||
SDVTList VTs = getVTList(VT, MVT::Other);
|
||||
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
|
||||
ID.AddInteger(VT.getRawBits());
|
||||
ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
|
||||
ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
|
||||
MMO->isVolatile(),
|
||||
MMO->isNonTemporal(),
|
||||
MMO->isInvariant()));
|
||||
@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
|
||||
}
|
||||
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
|
||||
dl.getDebugLoc(), Ops, 4, VTs,
|
||||
VT, MMO);
|
||||
ExtTy, MemVT, MMO);
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
|
||||
SDValue Ptr, SDValue Mask, EVT MemVT,
|
||||
MachineMemOperand *MMO, bool isTrunc) {
|
||||
assert(Chain.getValueType() == MVT::Other &&
|
||||
"Invalid chain type");
|
||||
EVT VT = Val.getValueType();
|
||||
@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
}
|
||||
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
|
||||
dl.getDebugLoc(), Ops, 4,
|
||||
VTs, VT, MMO);
|
||||
VTs, isTrunc, MemVT, MMO);
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
|
@ -3667,7 +3667,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
|
||||
getMachineMemOperand(MachinePointerInfo(PtrOperand),
|
||||
MachineMemOperand::MOStore, VT.getStoreSize(),
|
||||
Alignment, AAInfo);
|
||||
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
|
||||
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
|
||||
MMO, false);
|
||||
DAG.setRoot(StoreNode);
|
||||
setValue(&I, StoreNode);
|
||||
}
|
||||
@ -3706,7 +3707,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
|
||||
MachineMemOperand::MOLoad, VT.getStoreSize(),
|
||||
Alignment, AAInfo, Ranges);
|
||||
|
||||
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
|
||||
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
SDValue OutChain = Load.getValue(1);
|
||||
DAG.setRoot(OutChain);
|
||||
setValue(&I, Load);
|
||||
|
@ -4,7 +4,6 @@ add_llvm_library(LLVMExecutionEngine
|
||||
ExecutionEngine.cpp
|
||||
ExecutionEngineBindings.cpp
|
||||
GDBRegistrationListener.cpp
|
||||
RTDyldMemoryManager.cpp
|
||||
TargetSelect.cpp
|
||||
)
|
||||
|
||||
|
@ -22,4 +22,4 @@ subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT
|
||||
type = Library
|
||||
name = ExecutionEngine
|
||||
parent = Libraries
|
||||
required_libraries = Core MC Object Support
|
||||
required_libraries = Core MC Object Support RuntimeDyld
|
||||
|
@ -1,4 +1,5 @@
|
||||
add_llvm_library(LLVMRuntimeDyld
|
||||
RTDyldMemoryManager.cpp
|
||||
RuntimeDyld.cpp
|
||||
RuntimeDyldChecker.cpp
|
||||
RuntimeDyldELF.cpp
|
||||
|
@ -257,11 +257,11 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
|
||||
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
|
||||
return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr;
|
||||
|
||||
if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
|
||||
return CAZ->getElementValue(Elt);
|
||||
if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this))
|
||||
return Elt < CAZ->getNumElements() ? CAZ->getElementValue(Elt) : nullptr;
|
||||
|
||||
if (const UndefValue *UV = dyn_cast<UndefValue>(this))
|
||||
return UV->getElementValue(Elt);
|
||||
return Elt < UV->getNumElements() ? UV->getElementValue(Elt) : nullptr;
|
||||
|
||||
if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
|
||||
return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
|
||||
@ -764,6 +764,14 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
|
||||
return getStructElement(Idx);
|
||||
}
|
||||
|
||||
unsigned ConstantAggregateZero::getNumElements() const {
|
||||
const Type *Ty = getType();
|
||||
if (const auto *AT = dyn_cast<ArrayType>(Ty))
|
||||
return AT->getNumElements();
|
||||
if (const auto *VT = dyn_cast<VectorType>(Ty))
|
||||
return VT->getNumElements();
|
||||
return Ty->getStructNumElements();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// UndefValue Implementation
|
||||
@ -797,7 +805,14 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
|
||||
return getStructElement(Idx);
|
||||
}
|
||||
|
||||
|
||||
unsigned UndefValue::getNumElements() const {
|
||||
const Type *Ty = getType();
|
||||
if (const auto *AT = dyn_cast<ArrayType>(Ty))
|
||||
return AT->getNumElements();
|
||||
if (const auto *VT = dyn_cast<VectorType>(Ty))
|
||||
return VT->getNumElements();
|
||||
return Ty->getStructNumElements();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ConstantXXX Classes
|
||||
|
@ -1679,7 +1679,9 @@ void X86TargetLowering::resetOperationActions() {
|
||||
setTargetDAGCombine(ISD::FMA);
|
||||
setTargetDAGCombine(ISD::SUB);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::MLOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::MSTORE);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
@ -24738,6 +24740,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformMLOADCombine - Resolve extending loads
|
||||
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
|
||||
if (Mld->getExtensionType() != ISD::SEXTLOAD)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Mld->getValueType(0);
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
EVT LdVT = Mld->getMemoryVT();
|
||||
SDLoc dl(Mld);
|
||||
|
||||
assert(LdVT != VT && "Cannot extend to the same type");
|
||||
unsigned ToSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
|
||||
"Unexpected size for extending masked load");
|
||||
|
||||
unsigned SizeRatio = ToSz / FromSz;
|
||||
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
|
||||
|
||||
// Create a type on which we perform the shuffle
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
LdVT.getScalarType(), NumElems*SizeRatio);
|
||||
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
|
||||
// Convert Src0 value
|
||||
SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
|
||||
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type.
|
||||
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
|
||||
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
|
||||
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
|
||||
}
|
||||
// Prepare the new mask
|
||||
SDValue NewMask;
|
||||
SDValue Mask = Mld->getMask();
|
||||
if (Mask.getValueType() == VT) {
|
||||
// Mask and original value have the same type
|
||||
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
|
||||
ShuffleVec[i] = NumElems*SizeRatio;
|
||||
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
|
||||
DAG.getConstant(0, WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
}
|
||||
else {
|
||||
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
|
||||
unsigned WidenNumElts = NumElems*SizeRatio;
|
||||
unsigned MaskNumElts = VT.getVectorNumElements();
|
||||
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
WidenNumElts);
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
|
||||
}
|
||||
|
||||
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
|
||||
Mld->getBasePtr(), NewMask, WideSrc0,
|
||||
Mld->getMemoryVT(), Mld->getMemOperand(),
|
||||
ISD::NON_EXTLOAD);
|
||||
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
|
||||
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
|
||||
|
||||
}
|
||||
/// PerformMSTORECombine - Resolve truncating stores
|
||||
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
|
||||
if (!Mst->isTruncatingStore())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Mst->getValue().getValueType();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
EVT StVT = Mst->getMemoryVT();
|
||||
SDLoc dl(Mst);
|
||||
|
||||
assert(StVT != VT && "Cannot truncate to the same type");
|
||||
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
|
||||
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
|
||||
"Unexpected size for truncating masked store");
|
||||
// We are going to use the original vector elt for storing.
|
||||
// Accumulated smaller vector elements must be a multiple of the store size.
|
||||
assert (((NumElems * FromSz) % ToSz) == 0 &&
|
||||
"Unexpected ratio for truncating masked store");
|
||||
|
||||
unsigned SizeRatio = FromSz / ToSz;
|
||||
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
|
||||
|
||||
// Create a type on which we perform the shuffle
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
StVT.getScalarType(), NumElems*SizeRatio);
|
||||
|
||||
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
|
||||
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type.
|
||||
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
|
||||
|
||||
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
|
||||
DAG.getUNDEF(WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
|
||||
SDValue NewMask;
|
||||
SDValue Mask = Mst->getMask();
|
||||
if (Mask.getValueType() == VT) {
|
||||
// Mask and original value have the same type
|
||||
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
|
||||
ShuffleVec[i] = NumElems*SizeRatio;
|
||||
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
|
||||
DAG.getConstant(0, WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
}
|
||||
else {
|
||||
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
|
||||
unsigned WidenNumElts = NumElems*SizeRatio;
|
||||
unsigned MaskNumElts = VT.getVectorNumElements();
|
||||
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
WidenNumElts);
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
|
||||
}
|
||||
|
||||
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
|
||||
NewMask, StVT, Mst->getMemOperand(), false);
|
||||
}
|
||||
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
|
||||
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
@ -25836,7 +25998,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
|
@ -403,7 +403,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
|
||||
const Instruction& End,
|
||||
AliasAnalysis::Location
|
||||
Loc) {
|
||||
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Ref);
|
||||
return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
|
||||
}
|
||||
|
||||
///
|
||||
@ -414,6 +414,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction& Start,
|
||||
StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
|
||||
StoreInst *Store0) {
|
||||
DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
|
||||
BasicBlock *BB0 = Store0->getParent();
|
||||
for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
|
||||
RBI != RBE; ++RBI) {
|
||||
Instruction *Inst = &*RBI;
|
||||
@ -422,13 +423,14 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
|
||||
continue;
|
||||
|
||||
StoreInst *Store1 = cast<StoreInst>(Inst);
|
||||
BasicBlock *BB0 = Store0->getParent();
|
||||
|
||||
AliasAnalysis::Location Loc0 = AA->getLocation(Store0);
|
||||
AliasAnalysis::Location Loc1 = AA->getLocation(Store1);
|
||||
if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
|
||||
!isStoreSinkBarrierInRange(*Store1, BB1->back(), Loc1) &&
|
||||
!isStoreSinkBarrierInRange(*Store0, BB0->back(), Loc0)) {
|
||||
!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
|
||||
BB1->back(), Loc1) &&
|
||||
!isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
|
||||
BB0->back(), Loc0)) {
|
||||
return Store1;
|
||||
}
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ STATISTIC(NumRuntimeUnrolled,
|
||||
/// - Branch around the original loop if the trip count is less
|
||||
/// than the unroll factor.
|
||||
///
|
||||
static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
|
||||
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
|
||||
BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
|
||||
BasicBlock *OrigPH, BasicBlock *NewPH,
|
||||
ValueToValueMapTy &VMap, Pass *P) {
|
||||
@ -105,12 +105,19 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
|
||||
}
|
||||
}
|
||||
|
||||
// Create a branch around the orignal loop, which is taken if the
|
||||
// trip count is less than the unroll factor.
|
||||
// Create a branch around the orignal loop, which is taken if there are no
|
||||
// iterations remaining to be executed after running the prologue.
|
||||
Instruction *InsertPt = PrologEnd->getTerminator();
|
||||
|
||||
assert(Count != 0 && "nonsensical Count!");
|
||||
|
||||
// If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
|
||||
// (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
|
||||
// and all of the iterations of this loop were executed by the prologue. Note
|
||||
// that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
|
||||
Instruction *BrLoopExit =
|
||||
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
|
||||
ConstantInt::get(TripCount->getType(), Count));
|
||||
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, BECount,
|
||||
ConstantInt::get(BECount->getType(), Count - 1));
|
||||
BasicBlock *Exit = L->getUniqueExitBlock();
|
||||
assert(Exit && "Loop must have a single exit block only");
|
||||
// Split the exit to maintain loop canonicalization guarantees
|
||||
@ -292,23 +299,28 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
|
||||
// Only unroll loops with a computable trip count and the trip count needs
|
||||
// to be an int value (allowing a pointer type is a TODO item)
|
||||
const SCEV *BECount = SE->getBackedgeTakenCount(L);
|
||||
if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
|
||||
const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
|
||||
if (isa<SCEVCouldNotCompute>(BECountSC) ||
|
||||
!BECountSC->getType()->isIntegerTy())
|
||||
return false;
|
||||
|
||||
// If BECount is INT_MAX, we can't compute trip-count without overflow.
|
||||
if (BECount->isAllOnesValue())
|
||||
return false;
|
||||
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
|
||||
|
||||
// Add 1 since the backedge count doesn't include the first loop iteration
|
||||
const SCEV *TripCountSC =
|
||||
SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
|
||||
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
|
||||
if (isa<SCEVCouldNotCompute>(TripCountSC))
|
||||
return false;
|
||||
|
||||
// We only handle cases when the unroll factor is a power of 2.
|
||||
// Count is the loop unroll factor, the number of extra copies added + 1.
|
||||
if ((Count & (Count-1)) != 0)
|
||||
if (!isPowerOf2_32(Count))
|
||||
return false;
|
||||
|
||||
// This constraint lets us deal with an overflowing trip count easily; see the
|
||||
// comment on ModVal below. This check is equivalent to `Log2(Count) <
|
||||
// BEWidth`.
|
||||
if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
|
||||
return false;
|
||||
|
||||
// If this loop is nested, then the loop unroller changes the code in
|
||||
@ -330,16 +342,23 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
SCEVExpander Expander(*SE, "loop-unroll");
|
||||
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
|
||||
PreHeaderBR);
|
||||
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
|
||||
PreHeaderBR);
|
||||
|
||||
IRBuilder<> B(PreHeaderBR);
|
||||
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
|
||||
|
||||
// Check if for no extra iterations, then jump to cloned/unrolled loop.
|
||||
// We have to check that the trip count computation didn't overflow when
|
||||
// adding one to the backedge taken count.
|
||||
Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||
Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
|
||||
Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
|
||||
// If ModVal is zero, we know that either
|
||||
// 1. there are no iteration to be run in the prologue loop
|
||||
// OR
|
||||
// 2. the addition computing TripCount overflowed
|
||||
//
|
||||
// If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
|
||||
// number of iterations that remain to be run in the original loop is a
|
||||
// multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
|
||||
// explicitly check this above).
|
||||
|
||||
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");
|
||||
|
||||
// Branch to either the extra iterations or the cloned/unrolled loop
|
||||
// We will fix up the true branch label when adding loop body copies
|
||||
@ -362,10 +381,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
std::vector<BasicBlock *> NewBlocks;
|
||||
ValueToValueMapTy VMap;
|
||||
|
||||
// If unroll count is 2 and we can't overflow in tripcount computation (which
|
||||
// is BECount + 1), then we don't need a loop for prologue, and we can unroll
|
||||
// it. We can be sure that we don't overflow only if tripcount is a constant.
|
||||
bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
|
||||
bool UnrollPrologue = Count == 2;
|
||||
|
||||
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
|
||||
// the loop, otherwise we create a cloned loop to execute the extra
|
||||
@ -391,7 +407,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
|
||||
// Connect the prolog code to the original loop and update the
|
||||
// PHI functions.
|
||||
BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
|
||||
ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
|
||||
ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
|
||||
LPM->getAsPass());
|
||||
NumRuntimeUnrolled++;
|
||||
return true;
|
||||
|
@ -1874,6 +1874,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
|
||||
// wide store needs to start at the last vector element.
|
||||
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
|
||||
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
|
||||
Mask[Part] = reverseVector(Mask[Part]);
|
||||
}
|
||||
|
||||
Value *VecPtr = Builder.CreateBitCast(PartPtr,
|
||||
@ -1902,6 +1903,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
|
||||
// wide load needs to start at the last vector element.
|
||||
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
|
||||
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
|
||||
Mask[Part] = reverseVector(Mask[Part]);
|
||||
}
|
||||
|
||||
Instruction* NewLI;
|
||||
|
@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
||||
}
|
||||
|
||||
; AVX2-LABEL: test15
|
||||
; AVX2: vpmaskmovq
|
||||
; AVX2: vpmaskmovd
|
||||
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
|
||||
}
|
||||
|
||||
; AVX2-LABEL: test17
|
||||
; AVX2: vpmaskmovq
|
||||
; AVX2: vblendvpd
|
||||
; AVX2: vpmaskmovd
|
||||
; AVX2: vblendvps
|
||||
; AVX2: vpmovsxdq
|
||||
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
|
114
test/Transforms/InstMerge/st_sink_bugfix_22613.ll
Normal file
114
test/Transforms/InstMerge/st_sink_bugfix_22613.ll
Normal file
@ -0,0 +1,114 @@
|
||||
; ModuleID = 'bug.c'
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; RUN: opt -O2 -S < %s | FileCheck %s
|
||||
|
||||
; CHECK_LABEL: main
|
||||
; CHECK: if.end
|
||||
; CHECK: store
|
||||
; CHECK: memset
|
||||
; CHECK: if.then
|
||||
; CHECK: store
|
||||
; CHECK: memset
|
||||
|
||||
@d = common global i32 0, align 4
|
||||
@b = common global i32 0, align 4
|
||||
@f = common global [1 x [3 x i8]] zeroinitializer, align 1
|
||||
@e = common global i32 0, align 4
|
||||
@c = common global i32 0, align 4
|
||||
@a = common global i32 0, align 4
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @fn1() #0 {
|
||||
entry:
|
||||
store i32 0, i32* @d, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc8, %entry
|
||||
%0 = load i32* @d, align 4
|
||||
%cmp = icmp slt i32 %0, 2
|
||||
br i1 %cmp, label %for.body, label %for.end10
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* @d, align 4
|
||||
%idxprom = sext i32 %1 to i64
|
||||
%2 = load i32* @b, align 4
|
||||
%idxprom1 = sext i32 %2 to i64
|
||||
%arrayidx = getelementptr inbounds [1 x [3 x i8]]* @f, i32 0, i64 %idxprom1
|
||||
%arrayidx2 = getelementptr inbounds [3 x i8]* %arrayidx, i32 0, i64 %idxprom
|
||||
store i8 0, i8* %arrayidx2, align 1
|
||||
store i32 0, i32* @e, align 4
|
||||
br label %for.cond3
|
||||
|
||||
for.cond3: ; preds = %for.inc, %for.body
|
||||
%3 = load i32* @e, align 4
|
||||
%cmp4 = icmp slt i32 %3, 3
|
||||
br i1 %cmp4, label %for.body5, label %for.end
|
||||
|
||||
for.body5: ; preds = %for.cond3
|
||||
%4 = load i32* @c, align 4
|
||||
%tobool = icmp ne i32 %4, 0
|
||||
br i1 %tobool, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %for.body5
|
||||
%5 = load i32* @a, align 4
|
||||
%dec = add nsw i32 %5, -1
|
||||
store i32 %dec, i32* @a, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %for.body5
|
||||
%6 = load i32* @e, align 4
|
||||
%idxprom6 = sext i32 %6 to i64
|
||||
%arrayidx7 = getelementptr inbounds [3 x i8]* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0), i32 0, i64 %idxprom6
|
||||
store i8 1, i8* %arrayidx7, align 1
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end
|
||||
%7 = load i32* @e, align 4
|
||||
%inc = add nsw i32 %7, 1
|
||||
store i32 %inc, i32* @e, align 4
|
||||
br label %for.cond3
|
||||
|
||||
for.end: ; preds = %for.cond3
|
||||
br label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %for.end
|
||||
%8 = load i32* @d, align 4
|
||||
%inc9 = add nsw i32 %8, 1
|
||||
store i32 %inc9, i32* @d, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end10: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
store i32 0, i32* %retval
|
||||
call void @fn1()
|
||||
%0 = load i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
|
||||
%conv = sext i8 %0 to i32
|
||||
%cmp = icmp ne i32 %conv, 1
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
call void @abort() #2
|
||||
unreachable
|
||||
|
||||
if.end: ; preds = %entry
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: noreturn nounwind
|
||||
declare void @abort() #1
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { noreturn nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { noreturn nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"clang version 3.7.0 (trunk 229288) (llvm/trunk 229286:229290M)"}
|
19
test/Transforms/InstSimplify/load.ll
Normal file
19
test/Transforms/InstSimplify/load.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
@zeroinit = constant {} zeroinitializer
|
||||
@undef = constant {} undef
|
||||
|
||||
define i32 @crash_on_zeroinit() {
|
||||
; CHECK-LABEL: @crash_on_zeroinit
|
||||
; CHECK: ret i32 0
|
||||
%load = load i32* bitcast ({}* @zeroinit to i32*)
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
define i32 @crash_on_undef() {
|
||||
; CHECK-LABEL: @crash_on_undef
|
||||
; CHECK: ret i32 undef
|
||||
%load = load i32* bitcast ({}* @undef to i32*)
|
||||
ret i32 %load
|
||||
}
|
||||
|
@ -4,9 +4,7 @@
|
||||
|
||||
; CHECK: %xtraiter = and i32 %n
|
||||
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
|
||||
; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
|
||||
; CHECK: br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
|
||||
; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
|
||||
|
@ -3,7 +3,7 @@
|
||||
; This tests that setting the unroll count works
|
||||
|
||||
; CHECK: for.body.prol:
|
||||
; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
|
||||
; CHECK: br label %for.body.preheader.split
|
||||
; CHECK: for.body:
|
||||
; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
|
||||
; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
|
||||
|
@ -1,19 +1,28 @@
|
||||
; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; When prologue is fully unrolled, the branch on its end is unconditional.
|
||||
; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
|
||||
; like in this example, where it comes from an argument.
|
||||
;
|
||||
; This test is based on an example from here:
|
||||
; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
|
||||
;
|
||||
; This test case documents how runtime loop unrolling handles the case
|
||||
; when the backedge-count is -1.
|
||||
|
||||
; If %N, the backedge-taken count, is -1 then %0 unsigned-overflows
|
||||
; and is 0. %xtraiter too is 0, signifying that the total trip-count
|
||||
; is divisible by 2. The prologue then branches to the unrolled loop
|
||||
; and executes the 2^32 iterations there, in groups of 2.
|
||||
|
||||
|
||||
; CHECK: entry:
|
||||
; CHECK-NEXT: %0 = add i32 %N, 1
|
||||
; CHECK-NEXT: %xtraiter = and i32 %0, 1
|
||||
; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
|
||||
; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
|
||||
|
||||
; CHECK: while.body.prol:
|
||||
; CHECK: br i1
|
||||
; CHECK: br label %entry.split
|
||||
|
||||
; CHECK: entry.split:
|
||||
|
||||
; Function Attrs: nounwind readnone ssp uwtable
|
||||
define i32 @foo(i32 %N) #0 {
|
||||
define i32 @foo(i32 %N) {
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
@ -26,5 +35,3 @@ while.body: ; preds = %while.body, %entry
|
||||
while.end: ; preds = %while.body
|
||||
ret i32 %i
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
@ -418,3 +418,85 @@ for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
; Reverse loop
|
||||
;void foo6(double *in, double *out, unsigned size, int *trigger) {
|
||||
;
|
||||
; for (int i=SIZE-1; i>=0; i--) {
|
||||
; if (trigger[i] > 0) {
|
||||
; out[i] = in[i] + (double) 0.5;
|
||||
; }
|
||||
; }
|
||||
;}
|
||||
;AVX2-LABEL: @foo6
|
||||
;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
|
||||
;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||
;AVX2: call <4 x double> @llvm.masked.load.v4f64
|
||||
;AVX2: fadd <4 x double>
|
||||
;AVX2: call void @llvm.masked.store.v4f64
|
||||
;AVX2: ret void
|
||||
|
||||
;AVX512-LABEL: @foo6
|
||||
;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
|
||||
;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
|
||||
;AVX512: call <8 x double> @llvm.masked.load.v8f64
|
||||
;AVX512: fadd <8 x double>
|
||||
;AVX512: call void @llvm.masked.store.v8f64
|
||||
;AVX512: ret void
|
||||
|
||||
|
||||
define void @foo6(double* %in, double* %out, i32 %size, i32* %trigger) {
|
||||
entry:
|
||||
%in.addr = alloca double*, align 8
|
||||
%out.addr = alloca double*, align 8
|
||||
%size.addr = alloca i32, align 4
|
||||
%trigger.addr = alloca i32*, align 8
|
||||
%i = alloca i32, align 4
|
||||
store double* %in, double** %in.addr, align 8
|
||||
store double* %out, double** %out.addr, align 8
|
||||
store i32 %size, i32* %size.addr, align 4
|
||||
store i32* %trigger, i32** %trigger.addr, align 8
|
||||
store i32 4095, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp sge i32 %0, 0
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%idxprom = sext i32 %1 to i64
|
||||
%2 = load i32** %trigger.addr, align 8
|
||||
%arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
|
||||
%3 = load i32* %arrayidx, align 4
|
||||
%cmp1 = icmp sgt i32 %3, 0
|
||||
br i1 %cmp1, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%4 = load i32* %i, align 4
|
||||
%idxprom2 = sext i32 %4 to i64
|
||||
%5 = load double** %in.addr, align 8
|
||||
%arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
|
||||
%6 = load double* %arrayidx3, align 8
|
||||
%add = fadd double %6, 5.000000e-01
|
||||
%7 = load i32* %i, align 4
|
||||
%idxprom4 = sext i32 %7 to i64
|
||||
%8 = load double** %out.addr, align 8
|
||||
%arrayidx5 = getelementptr inbounds double* %8, i64 %idxprom4
|
||||
store double %add, double* %arrayidx5, align 8
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %for.body
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end
|
||||
%9 = load i32* %i, align 4
|
||||
%dec = add nsw i32 %9, -1
|
||||
store i32 %dec, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
MC
|
||||
MCJIT
|
||||
Object
|
||||
RuntimeDyld
|
||||
SelectionDAG
|
||||
Support
|
||||
native
|
||||
|
@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
ExecutionEngine
|
||||
Interpreter
|
||||
MC
|
||||
RuntimeDyld
|
||||
Support
|
||||
)
|
||||
|
||||
|
@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
IPO
|
||||
MC
|
||||
MCJIT
|
||||
RuntimeDyld
|
||||
ScalarOpts
|
||||
Support
|
||||
Target
|
||||
|
Loading…
Reference in New Issue
Block a user