Vendor import of llvm RELEASE_360/rc3 tag r229040 (effectively, 3.6.0 RC3):

https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/rc3@229040
This commit is contained in:
Dimitry Andric 2015-02-14 12:17:42 +00:00
parent ec304151b7
commit 608e665946
100 changed files with 1115 additions and 1533 deletions

View File

@ -334,11 +334,6 @@ function(llvm_add_library name)
PREFIX ""
)
endif()
if (MSVC)
set_target_properties(${name}
PROPERTIES
IMPORT_SUFFIX ".imp")
endif ()
endif()
if(ARG_MODULE OR ARG_SHARED)

View File

@ -391,7 +391,8 @@ During this release the MIPS target has reached a few major milestones. The
compiler has gained support for MIPS-II and MIPS-III; become ABI-compatible
with GCC for big and little endian O32, N32, and N64; and is now able to
compile the Linux kernel for 32-bit targets. Additionally, LLD now supports
microMIPS for the O32 ABI on little endian targets.
microMIPS for the O32 ABI on little endian targets, and code generation for
microMIPS is almost completely passing the test-suite.
ABI
^^^

View File

@ -66,7 +66,6 @@ struct LandingPadInfo {
MachineBasicBlock *LandingPadBlock; // Landing pad block.
SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
SmallVector<MCSymbol*, 1> EndLabels; // Labels after invoke.
SmallVector<MCSymbol*, 1> ClauseLabels; // Labels for each clause.
MCSymbol *LandingPadLabel; // Label at beginning of landing pad.
const Function *Personality; // Personality function.
std::vector<int> TypeIds; // List of type ids (filters negative)
@ -331,11 +330,6 @@ public:
///
void addCleanup(MachineBasicBlock *LandingPad);
/// Add a clause for a landing pad. Returns a new label for the clause. This
/// is used by EH schemes that have more than one landing pad. In this case,
/// each clause gets its own basic block.
MCSymbol *addClauseForLandingPad(MachineBasicBlock *LandingPad);
/// getTypeIDFor - Return the type id for the specified typeinfo. This is
/// function wide.
unsigned getTypeIDFor(const GlobalValue *TI);

View File

@ -248,7 +248,7 @@ public:
void setReductionState(ReductionState RS) { this->RS = RS; }
void handleAddEdge(const MatrixMetadata& MD, bool Transpose) {
DeniedOpts += Transpose ? MD.getWorstCol() : MD.getWorstRow();
DeniedOpts += Transpose ? MD.getWorstRow() : MD.getWorstCol();
const bool* UnsafeOpts =
Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
for (unsigned i = 0; i < NumOpts; ++i)
@ -256,7 +256,7 @@ public:
}
void handleRemoveEdge(const MatrixMetadata& MD, bool Transpose) {
DeniedOpts -= Transpose ? MD.getWorstCol() : MD.getWorstRow();
DeniedOpts -= Transpose ? MD.getWorstRow() : MD.getWorstCol();
const bool* UnsafeOpts =
Transpose ? MD.getUnsafeCols() : MD.getUnsafeRows();
for (unsigned i = 0; i < NumOpts; ++i)

View File

@ -693,6 +693,7 @@ public:
static AAMDNodes getMostGenericAA(const AAMDNodes &A, const AAMDNodes &B);
static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);
};
/// \brief Uniquable metadata node.

View File

@ -52,7 +52,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
const TargetTransformInfo &TTI;
/// The cache of @llvm.assume intrinsics.
AssumptionCache &AC;
AssumptionCacheTracker *ACT;
// The called function.
Function &F;
@ -146,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
AssumptionCache &AC, Function &Callee, int Threshold)
: DL(DL), TTI(TTI), AC(AC), F(Callee), Threshold(Threshold), Cost(0),
AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
: DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
CallAnalyzer CA(DL, TTI, AC, *F, InlineConstants::IndirectCallThreshold);
CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@ -1110,7 +1110,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@ -1310,7 +1310,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
<< "...\n");
CallAnalyzer CA(Callee->getDataLayout(), *TTI,
ACT->getAssumptionCache(*Callee), *Callee, Threshold);
ACT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());

View File

@ -623,8 +623,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
N.TBAA = getMetadata(LLVMContext::MD_tbaa);
if (Merge)
N.Scope =
MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope));
N.Scope = MDNode::getMostGenericAliasScope(
N.Scope, getMetadata(LLVMContext::MD_alias_scope));
else
N.Scope = getMetadata(LLVMContext::MD_alias_scope);

View File

@ -9,9 +9,11 @@
#include "llvm-c/BitReader.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
#include <string>
@ -30,11 +32,20 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf,
LLVMModuleRef *OutModule,
char **OutMessage) {
ErrorOr<Module *> ModuleOrErr =
parseBitcodeFile(unwrap(MemBuf)->getMemBufferRef(), *unwrap(ContextRef));
if (std::error_code EC = ModuleOrErr.getError()) {
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
LLVMContext &Ctx = *unwrap(ContextRef);
std::string Message;
raw_string_ostream Stream(Message);
DiagnosticPrinterRawOStream DP(Stream);
ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(
Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); });
if (ModuleOrErr.getError()) {
if (OutMessage) {
Stream.flush();
*OutMessage = strdup(Message.c_str());
}
*OutModule = wrap((Module*)nullptr);
return 1;
}

View File

@ -121,8 +121,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
int TypeID = TypeIds[J];
assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
int ValueForTypeID =
isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
@ -270,14 +269,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
CallSiteEntry Site = {
BeginLabel,
LastLabel,
LandingPad,
LandingPad->LandingPadLabel,
FirstActions[P.PadIndex]
};
// Try to merge with the previous call-site. SJLJ doesn't do this
if (PreviousIsInvoke && !IsSJLJ) {
CallSiteEntry &Prev = CallSites.back();
if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
Prev.EndLabel = Site.EndLabel;
continue;
@ -577,15 +576,15 @@ void EHStreamer::emitExceptionTable() {
// Offset of the landing pad, counted in 16-byte bundles relative to the
// @LPStart address.
if (!S.LPad) {
if (!S.PadLabel) {
if (VerboseAsm)
Asm->OutStreamer.AddComment(" has no landing pad");
Asm->OutStreamer.EmitIntValue(0, 4/*size*/);
} else {
if (VerboseAsm)
Asm->OutStreamer.AddComment(Twine(" jumps to ") +
S.LPad->LandingPadLabel->getName());
Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
S.PadLabel->getName());
Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
}
// Offset of the first associated action record, relative to the start of
@ -682,7 +681,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
unsigned TypeID = *I;
if (VerboseAsm) {
--Entry;
if (isFilterEHSelector(TypeID))
if (TypeID != 0)
Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
}

View File

@ -23,8 +23,6 @@ class MachineModuleInfo;
class MachineInstr;
class MachineFunction;
class AsmPrinter;
class MCSymbol;
class MCSymbolRefExpr;
template <typename T>
class SmallVectorImpl;
@ -62,11 +60,11 @@ protected:
/// Structure describing an entry in the call-site table.
struct CallSiteEntry {
// The 'try-range' is BeginLabel .. EndLabel.
MCSymbol *BeginLabel; // Null indicates the start of the function.
MCSymbol *EndLabel; // Null indicates the end of the function.
MCSymbol *BeginLabel; // zero indicates the start of the function.
MCSymbol *EndLabel; // zero indicates the end of the function.
// LPad contains the landing pad start labels.
const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
// The landing pad starts at PadLabel.
MCSymbol *PadLabel; // zero indicates that there is no landing pad.
unsigned Action;
};
@ -114,13 +112,6 @@ protected:
virtual void emitTypeInfos(unsigned TTypeEncoding);
// Helpers for for identifying what kind of clause an EH typeid or selector
// corresponds to. Negative selectors are for filter clauses, the zero
// selector is for cleanups, and positive selectors are for catch clauses.
static bool isFilterEHSelector(int Selector) { return Selector < 0; }
static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
static bool isCatchEHSelector(int Selector) { return Selector > 0; }
public:
EHStreamer(AsmPrinter *A);
virtual ~EHStreamer();

View File

@ -99,156 +99,9 @@ void Win64Exception::endFunction(const MachineFunction *) {
if (shouldEmitPersonality) {
Asm->OutStreamer.PushSection();
// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer.EmitWinEHHandlerData();
// Emit either MSVC-compatible tables or the usual Itanium-style LSDA after
// the UNWIND_INFO struct.
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::MSVC) {
const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
if (Per->getName() == "__C_specific_handler")
emitCSpecificHandlerTable();
else
report_fatal_error(Twine("unexpected personality function: ") +
Per->getName());
} else {
emitExceptionTable();
}
emitExceptionTable();
Asm->OutStreamer.PopSection();
}
Asm->OutStreamer.EmitWinCFIEndProc();
}
const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
Asm->OutContext);
}
/// Emit the language-specific data that __C_specific_handler expects. This
/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
/// up after faults with __try, __except, and __finally. The typeinfo values
/// are not really RTTI data, but pointers to filter functions that return an
/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
/// blocks and other cleanups, the landing pad label is zero, and the filter
/// function is actually a cleanup handler with the same prototype. A catch-all
/// entry is modeled with a null filter function field and a non-zero landing
/// pad label.
///
/// Possible filter function return values:
/// EXCEPTION_EXECUTE_HANDLER (1):
/// Jump to the landing pad label after cleanups.
/// EXCEPTION_CONTINUE_SEARCH (0):
/// Continue searching this table or continue unwinding.
/// EXCEPTION_CONTINUE_EXECUTION (-1):
/// Resume execution at the trapping PC.
///
/// Inferred table structure:
/// struct Table {
/// int NumEntries;
/// struct Entry {
/// imagerel32 LabelStart;
/// imagerel32 LabelEnd;
/// imagerel32 FilterOrFinally; // Zero means catch-all.
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
/// };
void Win64Exception::emitCSpecificHandlerTable() {
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
// Simplifying assumptions for first implementation:
// - Cleanups are not implemented.
// - Filters are not implemented.
// The Itanium LSDA table sorts similar landing pads together to simplify the
// actions table, but we don't need that.
SmallVector<const LandingPadInfo *, 64> LandingPads;
LandingPads.reserve(PadInfos.size());
for (const auto &LP : PadInfos)
LandingPads.push_back(&LP);
// Compute label ranges for call sites as we would for the Itanium LSDA, but
// use an all zero action table because we aren't using these actions.
SmallVector<unsigned, 64> FirstActions;
FirstActions.resize(LandingPads.size());
SmallVector<CallSiteEntry, 64> CallSites;
computeCallSiteTable(CallSites, LandingPads, FirstActions);
MCSymbol *EHFuncBeginSym =
Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
MCSymbol *EHFuncEndSym =
Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
// Emit the number of table entries.
unsigned NumEntries = 0;
for (const CallSiteEntry &CSE : CallSites) {
if (!CSE.LPad)
continue; // Ignore gaps.
for (int Selector : CSE.LPad->TypeIds) {
// Ignore C++ filter clauses in SEH.
// FIXME: Implement cleanup clauses.
if (isCatchEHSelector(Selector))
++NumEntries;
}
}
Asm->OutStreamer.EmitIntValue(NumEntries, 4);
// Emit the four-label records for each call site entry. The table has to be
// sorted in layout order, and the call sites should already be sorted.
for (const CallSiteEntry &CSE : CallSites) {
// Ignore gaps. Unlike the Itanium model, unwinding through a frame without
// an EH table entry will propagate the exception rather than terminating
// the program.
if (!CSE.LPad)
continue;
const LandingPadInfo *LPad = CSE.LPad;
// Compute the label range. We may reuse the function begin and end labels
// rather than forming new ones.
const MCExpr *Begin =
createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
const MCExpr *End;
if (CSE.EndLabel) {
// The interval is half-open, so we have to add one to include the return
// address of the last invoke in the range.
End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel),
MCConstantExpr::Create(1, Asm->OutContext),
Asm->OutContext);
} else {
End = createImageRel32(EHFuncEndSym);
}
// These aren't really type info globals, they are actually pointers to
// filter functions ordered by selector. The zero selector is used for
// cleanups, so slot zero corresponds to selector 1.
const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos();
// Do a parallel iteration across typeids and clause labels, skipping filter
// clauses.
assert(LPad->TypeIds.size() == LPad->ClauseLabels.size());
for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) {
// AddLandingPadInfo stores the clauses in reverse, but there is a FIXME
// to change that.
int Selector = LPad->TypeIds[E - I - 1];
MCSymbol *ClauseLabel = LPad->ClauseLabels[I];
// Ignore C++ filter clauses in SEH.
// FIXME: Implement cleanup clauses.
if (!isCatchEHSelector(Selector))
continue;
Asm->OutStreamer.EmitValue(Begin, 4);
Asm->OutStreamer.EmitValue(End, 4);
if (isCatchEHSelector(Selector)) {
assert(unsigned(Selector - 1) < SelectorToFilter.size());
const GlobalValue *TI = SelectorToFilter[Selector - 1];
if (TI) // Emit the filter function pointer.
Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4);
else // Otherwise, this is a "catch i8* null", or catch all.
Asm->OutStreamer.EmitIntValue(0, 4);
}
Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4);
}
}
}

View File

@ -29,10 +29,6 @@ class Win64Exception : public EHStreamer {
/// Per-function flag to indicate if frame moves info should be emitted.
bool shouldEmitMoves;
void emitCSpecificHandlerTable();
const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value);
public:
//===--------------------------------------------------------------------===//
// Main entry points.

View File

@ -452,14 +452,6 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
LP.TypeIds.push_back(0);
}
MCSymbol *
MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) {
MCSymbol *ClauseLabel = Context.CreateTempSymbol();
LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
LP.ClauseLabels.push_back(ClauseLabel);
return ClauseLabel;
}
/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
/// pads.
void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {

View File

@ -449,9 +449,9 @@ void TargetPassConfig::addPassesToHandleExceptions() {
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
case ExceptionHandling::ItaniumWinEH:
case ExceptionHandling::MSVC: // FIXME: Needs preparation.
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::MSVC: // FIXME: Add preparation.
case ExceptionHandling::None:
addPass(createLowerInvokePass());

View File

@ -6544,19 +6544,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
return Res;
// Folding it resulted in an illegal node, and it's too late to
// do that. Clean up the old node and forego the transformation.
// Ideally this won't happen very often, because instcombine
// and the earlier dagcombine runs (where illegal nodes are
// permitted) should have folded most of them already.
deleteAndRecombine(Res.getNode());
}
// If we can't allow illegal operations, we need to check that this is just
// a fp -> int or int -> conversion and that the resulting operation will
// be legal.
if (!LegalOperations ||
(isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
(isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
TLI.isOperationLegal(ISD::Constant, VT)))
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
}
// (conv (conv x, t1), t2) -> (conv x, t2)

View File

@ -390,7 +390,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
if (Op.getOperand(j)
.getValueType()
.getVectorElementType()
.isFloatingPoint())
.isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
else
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
@ -399,8 +400,9 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
if (VT.isFloatingPoint() ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint()))
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0));
else
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
@ -554,9 +556,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
WideIdx++;
Offset -= WideBits;
if (Offset > 0) {
ShAmt = DAG.getConstant(SrcEltBits - Offset,
BitOffset -= WideBits;
if (BitOffset > 0) {
ShAmt = DAG.getConstant(SrcEltBits - BitOffset,
TLI.getShiftAmountTy(WideVT));
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);

View File

@ -2071,14 +2071,10 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// Get the two live-in registers as SDValues. The physregs have already been
// copied into virtual registers.
SDValue Ops[2];
if (FuncInfo.ExceptionPointerVirtReg) {
Ops[0] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
getCurSDLoc(), ValueVTs[0]);
} else {
Ops[0] = DAG.getConstant(0, TLI.getPointerTy());
}
Ops[0] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
getCurSDLoc(), ValueVTs[0]);
Ops[1] = DAG.getZExtOrTrunc(
DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
@ -2090,27 +2086,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
unsigned
SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
MachineBasicBlock *LPadBB) {
SDValue Chain = getControlRoot();
// Get the typeid that we will dispatch on later.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy());
Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel);
// Branch to the main landing pad block.
MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
ClauseMBB->addSuccessor(LPadBB);
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain,
DAG.getBasicBlock(LPadBB)));
return VReg;
}
/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
/// small case ranges).
bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,

View File

@ -713,8 +713,6 @@ public:
void visitJumpTable(JumpTable &JT);
void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
MachineBasicBlock *LPadMBB);
private:
// These all get lowered before this pass.

View File

@ -19,7 +19,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@ -41,7 +40,6 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@ -909,8 +907,6 @@ void SelectionDAGISel::DoInstructionSelection() {
void SelectionDAGISel::PrepareEHLandingPad() {
MachineBasicBlock *MBB = FuncInfo->MBB;
const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@ -922,66 +918,8 @@ void SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
if (TM.getMCAsmInfo()->getExceptionHandlingType() ==
ExceptionHandling::MSVC) {
// Make virtual registers and a series of labels that fill in values for the
// clauses.
auto &RI = MF->getRegInfo();
FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
// Get all invoke BBs that will unwind into the clause BBs.
SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
MBB->pred_end());
// Emit separate machine basic blocks with separate labels for each clause
// before the main landing pad block.
const BasicBlock *LLVMBB = MBB->getBasicBlock();
const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
MachineInstrBuilder SelectorPHI = BuildMI(
*MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
FuncInfo->ExceptionSelectorVirtReg);
for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
MF->insert(MBB, ClauseBB);
// Add the edge from the invoke to the clause.
for (MachineBasicBlock *InvokeBB : InvokeBBs)
InvokeBB->addSuccessor(ClauseBB);
// Mark the clause as a landing pad or MI passes will delete it.
ClauseBB->setIsLandingPad();
GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
// Start the BB with a label.
MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
.addSym(ClauseLabel);
// Construct a simple BB that defines a register with the typeid constant.
FuncInfo->MBB = ClauseBB;
FuncInfo->InsertPt = ClauseBB->end();
unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
// Add the typeid virtual register to the phi in the main landing pad.
SelectorPHI.addReg(VReg).addMBB(ClauseBB);
}
// Remove the edge from the invoke to the lpad.
for (MachineBasicBlock *InvokeBB : InvokeBBs)
InvokeBB->removeSuccessor(MBB);
// Restore FuncInfo back to its previous state and select the main landing
// pad block.
FuncInfo->MBB = MBB;
FuncInfo->InsertPt = MBB->end();
return;
}
// Mark exception register as live in.
const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
if (unsigned Reg = TLI->getExceptionPointerRegister())
FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);

View File

@ -525,12 +525,15 @@ bool DISubprogram::Verify() const {
while ((IA = DL.getInlinedAt()))
DL = DebugLoc::getFromDILocation(IA);
DL.getScopeAndInlinedAt(Scope, IA);
if (!Scope)
return false;
assert(!IA);
while (!DIDescriptor(Scope).isSubprogram()) {
DILexicalBlockFile D(Scope);
Scope = D.isLexicalBlockFile()
? D.getScope()
: DebugLoc::getFromDILexicalBlock(Scope).getScope();
assert(Scope && "lexical block file has no scope");
}
if (!DISubprogram(Scope).describes(F))
return false;

View File

@ -826,6 +826,28 @@ MDNode *MDNode::intersect(MDNode *A, MDNode *B) {
return getOrSelfReference(A->getContext(), MDs);
}
MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
if (!A || !B)
return nullptr;
SmallVector<Metadata *, 4> MDs(B->op_begin(), B->op_end());
for (unsigned i = 0, ie = A->getNumOperands(); i != ie; ++i) {
Metadata *MD = A->getOperand(i);
bool insert = true;
for (unsigned j = 0, je = B->getNumOperands(); j != je; ++j)
if (MD == B->getOperand(j)) {
insert = false;
break;
}
if (insert)
MDs.push_back(MD);
}
// FIXME: This preserves long-standing behaviour, but is it really the right
// behaviour? Or was that an unintended side-effect of node uniquing?
return getOrSelfReference(A->getContext(), MDs);
}
MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
if (!A || !B)
return nullptr;

View File

@ -708,9 +708,10 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)
VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
Type *ElementType = const_cast<Type*>(elementType);
assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
assert(isValidElementType(ElementType) &&
"Elements of a VectorType must be a primitive type");
assert(isValidElementType(ElementType) && "Element type of a VectorType must "
"be an integer, floating point, or "
"pointer type.");
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
VectorType *&Entry = ElementType->getContext().pImpl
->VectorTypes[std::make_pair(ElementType, NumElements)];

View File

@ -47,6 +47,10 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
}
OS << "\t.section\t" << getSectionName() << ",\"";
if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
OS << 'd';
if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
OS << 'b';
if (getCharacteristics() & COFF::IMAGE_SCN_MEM_EXECUTE)
OS << 'x';
if (getCharacteristics() & COFF::IMAGE_SCN_MEM_WRITE)
@ -55,10 +59,6 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << 'r';
else
OS << 'y';
if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
OS << 'd';
if (getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
OS << 'b';
if (getCharacteristics() & COFF::IMAGE_SCN_LNK_REMOVE)
OS << 'n';
if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)

View File

@ -710,17 +710,22 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
CrossSection = &Symbol.getSection() != &B->getSection();
// Offset of the symbol in the section
int64_t a = Layout.getSymbolOffset(&B_SD);
int64_t OffsetOfB = Layout.getSymbolOffset(&B_SD);
// Offset of the relocation in the section
int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
FixedValue = b - a;
// In the case where we have SymbA and SymB, we just need to store the delta
// between the two symbols. Update FixedValue to account for the delta, and
// skip recording the relocation.
if (!CrossSection)
if (!CrossSection) {
int64_t OffsetOfA = Layout.getSymbolOffset(&A_SD);
FixedValue = (OffsetOfA - OffsetOfB) + Target.getConstant();
return;
}
// Offset of the relocation in the section
int64_t OffsetOfRelocation =
Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
FixedValue = OffsetOfRelocation - OffsetOfB;
} else {
FixedValue = Target.getConstant();
}

View File

@ -49,6 +49,14 @@
#include "regcclass.h"
#include "regcname.h"
#include "llvm/Config/config.h"
#if HAVE_STDINT_H
#include <stdint.h>
#else
/* Pessimistically bound memory use */
#define SIZE_MAX UINT_MAX
#endif
/*
* parse structure, passed up and down to avoid global variables and
* other clumsinesses
@ -1069,6 +1077,8 @@ allocset(struct parse *p)
p->ncsalloc += CHAR_BIT;
nc = p->ncsalloc;
if (nc > SIZE_MAX / sizeof(cset))
goto nomem;
assert(nc % CHAR_BIT == 0);
nbytes = nc / CHAR_BIT * css;
@ -1412,6 +1422,11 @@ enlarge(struct parse *p, sopno size)
if (p->ssize >= size)
return;
if ((unsigned long)size > SIZE_MAX / sizeof(sop)) {
SETERROR(REG_ESPACE);
return;
}
sp = (sop *)realloc(p->strip, size*sizeof(sop));
if (sp == NULL) {
SETERROR(REG_ESPACE);
@ -1428,6 +1443,12 @@ static void
stripsnug(struct parse *p, struct re_guts *g)
{
g->nstates = p->slen;
if ((unsigned long)p->slen > SIZE_MAX / sizeof(sop)) {
g->strip = p->strip;
SETERROR(REG_ESPACE);
return;
}
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
if (g->strip == NULL) {
SETERROR(REG_ESPACE);

View File

@ -6287,6 +6287,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode CC, bool NoNans, EVT VT,
SDLoc dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
@ -6381,13 +6383,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
dl, DAG);
SDValue Cmp =
EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
@ -6401,19 +6405,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());

View File

@ -2400,7 +2400,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
// For CMPri w/ CmpValue != 0, a Sub may still be a candidate.
// Thus we cannot return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
MI = nullptr;
@ -2479,8 +2480,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
case ARM::t2EORrr:
case ARM::t2EORri: {
// Scan forward for the use of CPSR
// When checking against MI: if it's a conditional code requires
// checking of V bit, then this is not safe to do.
// When checking against MI: if it's a conditional code that requires
// checking of the V bit or C bit, then this is not safe to do.
// It is safe to remove CmpInstr if CPSR is redefined or killed.
// If we are done with the basic block, we need to check whether CPSR is
// live-out.
@ -2547,19 +2548,30 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
OperandsToUpdate.push_back(
std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
} else
} else {
// No Sub, so this is x = <op> y, z; cmp x, 0.
switch (CC) {
default:
case ARMCC::EQ: // Z
case ARMCC::NE: // Z
case ARMCC::MI: // N
case ARMCC::PL: // N
case ARMCC::AL: // none
// CPSR can be used multiple times, we should continue.
break;
case ARMCC::VS:
case ARMCC::VC:
case ARMCC::GE:
case ARMCC::LT:
case ARMCC::GT:
case ARMCC::LE:
case ARMCC::HS: // C
case ARMCC::LO: // C
case ARMCC::VS: // V
case ARMCC::VC: // V
case ARMCC::HI: // C Z
case ARMCC::LS: // C Z
case ARMCC::GE: // N V
case ARMCC::LT: // N V
case ARMCC::GT: // Z N V
case ARMCC::LE: // Z N V
// The instruction uses the V bit or C bit which is not safe.
return false;
}
}
}
}

View File

@ -565,7 +565,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM)
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
@ -4488,6 +4487,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
@ -4517,8 +4517,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
break;
case ISD::SETUO: Invert = true; // Fallthrough
case ISD::SETO:
@ -4526,8 +4526,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
TmpOp0 = Op0;
TmpOp1 = Op1;
Opc = ISD::OR;
Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
break;
}
} else {
@ -4561,8 +4561,8 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
Invert = !Invert;
}
}
@ -4588,22 +4588,24 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
if (SingleOp.getNode()) {
switch (Opc) {
case ARMISD::VCEQ:
Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLEZ:
Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGT:
Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLTZ:
Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
default:
Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
} else {
Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
Result = DAG.getSExtOrTrunc(Result, dl, VT);
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
@ -8877,18 +8879,17 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
DAG.getUNDEF(VT), NewMask.data());
}
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
/// NEON load/store intrinsics, and generic vector load/stores, to merge
/// base address updates.
/// For generic load/stores, the memory type is assumed to be a vector.
/// The caller is assumed to have checked legality.
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
/// NEON load/store intrinsics to merge base address updates.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
bool isStore = N->getOpcode() == ISD::STORE;
unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
@ -8949,10 +8950,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
NumVecs = 1; isLaneOp = false; break;
case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
NumVecs = 1; isLoad = false; isLaneOp = false; break;
}
}
@ -8960,11 +8957,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
EVT VecTy;
if (isLoad)
VecTy = N->getValueType(0);
else if (isIntrinsic)
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
else
VecTy = N->getOperand(1).getValueType();
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
NumBytes /= VecTy.getVectorNumElements();
@ -8981,70 +8975,25 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
}
EVT AlignedVecTy = VecTy;
// If this is a less-than-standard-aligned load/store, change the type to
// match the standard alignment.
// The alignment is overlooked when selecting _UPD variants; and it's
// easier to introduce bitcasts here than fix that.
// There are 3 ways to get to this base-update combine:
// - intrinsics: they are assumed to be properly aligned (to the standard
// alignment of the memory type), so we don't need to do anything.
// - ARMISD::VLDx nodes: they are only generated from the aforementioned
// intrinsics, so, likewise, there's nothing to do.
// - generic load/store instructions: the alignment is specified as an
// explicit operand, rather than implicitly as the standard alignment
// of the memory type (like the intrisics). We need to change the
// memory type to match the explicit alignment. That way, we don't
// generate non-standard-aligned ARMISD::VLDx nodes.
if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) {
unsigned Alignment = LSN->getAlignment();
if (Alignment == 0)
Alignment = 1;
if (Alignment < VecTy.getScalarSizeInBits() / 8) {
MVT EltTy = MVT::getIntegerVT(Alignment * 8);
assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
assert(!isLaneOp && "Unexpected generic load/store lane.");
unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
}
}
// Create the new updating load/store node.
// First, create an SDVTList for the new updating node's results.
EVT Tys[6];
unsigned NumResultVecs = (isLoad ? NumVecs : 0);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = AlignedVecTy;
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
// Then, gather the new node's operands.
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
Ops.push_back(Inc);
if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
// Try to match the intrinsic's signature
Ops.push_back(StN->getValue());
Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32));
} else {
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i)
Ops.push_back(N->getOperand(i));
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
Ops.push_back(N->getOperand(i));
}
// If this is a non-standard-aligned store, the penultimate operand is the
// stored value. Bitcast it to the aligned type.
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
SDValue &StVal = Ops[Ops.size()-2];
StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
}
MemSDNode *MemInt = cast<MemSDNode>(N);
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
Ops, AlignedVecTy,
Ops, MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
@ -9052,14 +9001,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
// If this is an non-standard-aligned load, the first result is the loaded
// value. Bitcast it to the expected result type.
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
SDValue &LdVal = NewResults[0];
LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@ -9069,14 +9010,6 @@ static SDValue CombineBaseUpdate(SDNode *N,
return SDValue();
}
static SDValue PerformVLDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
return CombineBaseUpdate(N, DCI);
}
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
@ -9190,18 +9123,6 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
static SDValue PerformLOADCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
// If this is a legal vector load, try to combine it into a VLD1_UPD.
if (ISD::isNormalLoad(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
return SDValue();
}
/// PerformSTORECombine - Target-specific dag combine xforms for
/// ISD::STORE.
static SDValue PerformSTORECombine(SDNode *N,
@ -9340,11 +9261,6 @@ static SDValue PerformSTORECombine(SDNode *N,
St->getAAInfo());
}
// If this is a legal vector store, try to combine it into a VST1_UPD.
if (ISD::isNormalStore(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
return SDValue();
}
@ -9938,11 +9854,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
case ARMISD::VLD4DUP:
return PerformVLDCombine(N, DCI);
return CombineBaseUpdate(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
case ISD::INTRINSIC_VOID:
@ -9962,7 +9877,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane:
return PerformVLDCombine(N, DCI);
return CombineBaseUpdate(N, DCI);
default: break;
}
break;

View File

@ -9195,34 +9195,48 @@ static const struct {
const uint64_t Enabled;
const uint64_t Disabled;
} FPUs[] = {
{ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON},
{ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON},
{ARM::VFPV3, ARM::FeatureVFP2 | ARM::FeatureVFP3, ARM::FeatureNEON},
{ARM::VFPV3_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
ARM::FeatureNEON},
{ARM::VFPV4, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
ARM::FeatureNEON},
{ARM::VFPV4_D16,
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureD16,
ARM::FeatureNEON},
{ARM::FPV5_D16, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8 | ARM::FeatureD16,
ARM::FeatureNEON | ARM::FeatureCrypto},
{ARM::FP_ARMV8, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8,
ARM::FeatureNEON | ARM::FeatureCrypto},
{ARM::NEON, ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON, 0},
{ARM::NEON_VFPV4,
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 | ARM::FeatureNEON,
0},
{ARM::NEON_FP_ARMV8,
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
{/* ID */ ARM::VFP,
/* Enabled */ ARM::FeatureVFP2,
/* Disabled */ ARM::FeatureNEON},
{/* ID */ ARM::VFPV2,
/* Enabled */ ARM::FeatureVFP2,
/* Disabled */ ARM::FeatureNEON},
{/* ID */ ARM::VFPV3,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3,
/* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
{/* ID */ ARM::VFPV3_D16,
/* Enable */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureD16,
/* Disabled */ ARM::FeatureNEON},
{/* ID */ ARM::VFPV4,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4,
/* Disabled */ ARM::FeatureNEON | ARM::FeatureD16},
{/* ID */ ARM::VFPV4_D16,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureD16,
/* Disabled */ ARM::FeatureNEON},
{/* ID */ ARM::FPV5_D16,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8 | ARM::FeatureD16,
/* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto},
{/* ID */ ARM::FP_ARMV8,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8,
/* Disabled */ ARM::FeatureNEON | ARM::FeatureCrypto | ARM::FeatureD16},
{/* ID */ ARM::NEON,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureNEON,
/* Disabled */ ARM::FeatureD16},
{/* ID */ ARM::NEON_VFPV4,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureNEON,
/* Disabled */ ARM::FeatureD16},
{/* ID */ ARM::NEON_FP_ARMV8,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8 | ARM::FeatureNEON,
ARM::FeatureCrypto},
{ARM::CRYPTO_NEON_FP_ARMV8,
ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
/* Disabled */ ARM::FeatureCrypto | ARM::FeatureD16},
{/* ID */ ARM::CRYPTO_NEON_FP_ARMV8,
/* Enabled */ ARM::FeatureVFP2 | ARM::FeatureVFP3 | ARM::FeatureVFP4 |
ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto,
0},
/* Disabled */ ARM::FeatureD16},
{ARM::SOFTVFP, 0, 0},
};

View File

@ -3134,7 +3134,8 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", IIC_LdStICBI, []>;
def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
// We used to have EIEIO as value but E[0-9A-Z] is a reserved name
def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins),
"eieio", IIC_LdStLoad, []>;
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),

View File

@ -100,7 +100,7 @@ bool AMDGPUTTI::hasBranchDivergence() const { return true; }
void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L,
UnrollingPreferences &UP) const {
UP.Threshold = 300; // Twice the default.
UP.Count = UINT_MAX;
UP.MaxCount = UINT_MAX;
UP.Partial = true;
// TODO: Do we want runtime unrolling?

View File

@ -14,6 +14,7 @@
#include "AMDGPU.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@ -66,6 +67,8 @@ class SIAnnotateControlFlow : public FunctionPass {
DominatorTree *DT;
StackVector Stack;
LoopInfo *LI;
bool isTopOfStack(BasicBlock *BB);
Value *popSaved();
@ -99,6 +102,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
@ -277,10 +281,25 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
push(Term->getSuccessor(0), Arg);
}
/// \brief Close the last opened control flow
}/// \brief Close the last opened control flow
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
llvm::Loop *L = LI->getLoopFor(BB);
if (L && L->getHeader() == BB) {
// We can't insert an EndCF call into a loop header, because it will
// get executed on every iteration of the loop, when it should be
// executed only once before the loop.
SmallVector <BasicBlock*, 8> Latches;
L->getLoopLatches(Latches);
std::vector<BasicBlock*> Preds;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
Preds.push_back(*PI);
}
BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", this);
}
CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
}
@ -288,6 +307,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
/// recognize if/then/else and loops.
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfo>();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {

View File

@ -266,6 +266,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
break;
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: {

View File

@ -132,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
[FeatureFMA4]>;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
"HasSSEUnalignedMem", "true",
"Allow unaligned memory operands with SSE instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL
@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureSlowUAMem32,
FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureAVX2,
FeatureCMPXCHG16B,
FeatureFastUAMem,
FeatureVectorUAMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec, FeatureVectorUAMem]>;
FeatureSlowIncDec]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
FeatureSlowIncDec, FeatureSGX]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.

View File

@ -688,11 +688,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
for (const auto &Function : M)
if (Function.hasDLLExportStorageClass())
if (Function.hasDLLExportStorageClass() && !Function.isDeclaration())
DLLExportedFns.push_back(getSymbol(&Function));
for (const auto &Global : M.globals())
if (Global.hasDLLExportStorageClass())
if (Global.hasDLLExportStorageClass() && !Global.isDeclaration())
DLLExportedGlobals.push_back(getSymbol(&Global));
for (const auto &Alias : M.aliases()) {

View File

@ -5473,6 +5473,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodePSHUFBMask(C, Mask);
if (Mask.empty())
return false;
break;
}

View File

@ -424,7 +424,7 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr),
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return Subtarget->hasVectorUAMem()
return Subtarget->hasSSEUnalignedMem()
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;

View File

@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() {
IsSHLDSlow = false;
IsUAMemFast = false;
IsUAMem32Slow = false;
HasVectorUAMem = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide32 = false;

View File

@ -162,9 +162,9 @@ protected:
/// True if unaligned 32-byte memory accesses are slow.
bool IsUAMem32Slow;
/// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
/// True if SSE operations can have unaligned memory operands.
/// This may require setting a configuration bit in the processor.
bool HasSSEUnalignedMem;
/// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
/// this is true for most x86-64 chips, but not the first AMD chips.
@ -378,7 +378,7 @@ public:
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide32() const { return HasSlowDivide32; }

View File

@ -330,11 +330,17 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
case LLVMContext::MD_nonnull:
// All of these directly apply.
NewLoad->setMetadata(ID, N);
break;
case LLVMContext::MD_nonnull:
// FIXME: We should translate this into range metadata for integer types
// and vice versa.
if (NewTy->isPointerTy())
NewLoad->setMetadata(ID, N);
break;
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
// conversions make it hard.
@ -548,13 +554,14 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
case LLVMContext::MD_mem_parallel_loop_access:
case LLVMContext::MD_nonnull:
// All of these directly apply.
NewStore->setMetadata(ID, N);
break;
case LLVMContext::MD_invariant_load:
case LLVMContext::MD_nonnull:
case LLVMContext::MD_range:
// These don't apply for stores.
break;
}
}

View File

@ -67,7 +67,7 @@ static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 36;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kWindowsShadowOffset32 = 1ULL << 30;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K

View File

@ -71,9 +71,17 @@ private:
return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
}
/// Get the section name for the coverage mapping data.
StringRef getCoverageSection() const {
return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
}
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
/// Set up the section and uses for coverage data and its references.
void lowerCoverageData(GlobalVariable *CoverageData);
/// Get the region counters for an increment, creating them if necessary.
///
/// If the counter array doesn't yet exist, the profile data variables
@ -118,6 +126,10 @@ bool InstrProfiling::runOnModule(Module &M) {
lowerIncrement(Inc);
MadeChange = true;
}
if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
lowerCoverageData(Coverage);
MadeChange = true;
}
if (!MadeChange)
return false;
@ -140,6 +152,35 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
Inc->eraseFromParent();
}
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
CoverageData->setSection(getCoverageSection());
CoverageData->setAlignment(8);
Constant *Init = CoverageData->getInitializer();
// We're expecting { i32, i32, i32, i32, [n x { i8*, i32, i32 }], [m x i8] }
// for some C. If not, the frontend's given us something broken.
assert(Init->getNumOperands() == 6 && "bad number of fields in coverage map");
assert(isa<ConstantArray>(Init->getAggregateElement(4)) &&
"invalid function list in coverage map");
ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(4));
for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
Constant *Record = Records->getOperand(I);
Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
GlobalVariable *Name = cast<GlobalVariable>(V);
// If we have region counters for this name, we've already handled it.
auto It = RegionCounters.find(Name);
if (It != RegionCounters.end())
continue;
// Move the name variable to the right section.
Name->setSection(getNameSection());
Name->setAlignment(1);
}
}
/// Get the name of a profiling variable for a particular function.
static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {
auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());

View File

@ -631,7 +631,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
if (MS.TrackOrigins)
if (MS.TrackOrigins && !SI.isAtomic())
storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI.getAlignment(),
InstrumentWithCalls);
}

View File

@ -480,6 +480,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Ignore volatile loads.
if (!LI->isSimple()) {
LastStore = nullptr;
// Don't CSE across synchronization boundaries.
if (Inst->mayWriteToMemory())
++CurrentGeneration;
continue;
}

View File

@ -750,6 +750,16 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// its dependence information by changing its parameter.
MD->removeInstruction(C);
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
LLVMContext::MD_alias_scope,
LLVMContext::MD_noalias,
};
combineMetadata(C, cpy, KnownIDs);
// Remove the memcpy.
MD->removeInstruction(cpy);
++NumMemCpyInstr;

View File

@ -1328,6 +1328,8 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
break;
case LLVMContext::MD_alias_scope:
K->setMetadata(Kind, MDNode::getMostGenericAliasScope(JMD, KMD));
break;
case LLVMContext::MD_noalias:
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;

View File

@ -154,19 +154,21 @@ static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
}
static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags,
static Metadata *MapMetadataImpl(const Metadata *MD,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer);
static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM,
RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
static Metadata *mapMetadataOp(Metadata *Op,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
if (!Op)
return nullptr;
if (Metadata *MappedOp =
MapMetadataImpl(Op, VM, Flags, TypeMapper, Materializer))
MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
return MappedOp;
// Use identity map if MappedOp is null and we can ignore missing entries.
if (Flags & RF_IgnoreMissingEntries)
@ -180,8 +182,9 @@ static Metadata *mapMetadataOp(Metadata *Op, ValueToValueMapTy &VM,
return nullptr;
}
static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM,
RemapFlags Flags,
static Metadata *cloneMDTuple(const MDTuple *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer,
bool IsDistinct) {
@ -192,41 +195,57 @@ static Metadata *cloneMDTuple(const MDTuple *Node, ValueToValueMapTy &VM,
SmallVector<Metadata *, 4> Elts;
Elts.reserve(Node->getNumOperands());
for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
Elts.push_back(mapMetadataOp(Node->getOperand(I), VM, Flags, TypeMapper,
Materializer));
Elts.push_back(mapMetadataOp(Node->getOperand(I), Cycles, VM, Flags,
TypeMapper, Materializer));
return MDTuple::get(Node->getContext(), Elts);
}
static Metadata *cloneMDLocation(const MDLocation *Node, ValueToValueMapTy &VM,
RemapFlags Flags,
static Metadata *cloneMDLocation(const MDLocation *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer,
bool IsDistinct) {
return (IsDistinct ? MDLocation::getDistinct : MDLocation::get)(
Node->getContext(), Node->getLine(), Node->getColumn(),
mapMetadataOp(Node->getScope(), VM, Flags, TypeMapper, Materializer),
mapMetadataOp(Node->getInlinedAt(), VM, Flags, TypeMapper, Materializer));
mapMetadataOp(Node->getScope(), Cycles, VM, Flags, TypeMapper,
Materializer),
mapMetadataOp(Node->getInlinedAt(), Cycles, VM, Flags, TypeMapper,
Materializer));
}
static Metadata *cloneMDNode(const UniquableMDNode *Node, ValueToValueMapTy &VM,
RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
static Metadata *cloneMDNode(const UniquableMDNode *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer, bool IsDistinct) {
switch (Node->getMetadataID()) {
default:
llvm_unreachable("Invalid UniquableMDNode subclass");
#define HANDLE_UNIQUABLE_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
return clone##CLASS(cast<CLASS>(Node), VM, Flags, TypeMapper, \
return clone##CLASS(cast<CLASS>(Node), Cycles, VM, Flags, TypeMapper, \
Materializer, IsDistinct);
#include "llvm/IR/Metadata.def"
}
}
static void
trackCyclesUnderDistinct(const UniquableMDNode *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles) {
// Track any cycles beneath this node.
for (Metadata *Op : Node->operands())
if (auto *N = dyn_cast_or_null<UniquableMDNode>(Op))
if (!N->isResolved())
Cycles.push_back(N);
}
/// \brief Map a distinct MDNode.
///
/// Distinct nodes are not uniqued, so they must always recreated.
static Metadata *mapDistinctNode(const UniquableMDNode *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
@ -241,9 +260,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
// Fix the operands.
for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
NewMD->replaceOperandWith(I, mapMetadataOp(Node->getOperand(I), VM, Flags,
TypeMapper, Materializer));
NewMD->replaceOperandWith(I,
mapMetadataOp(Node->getOperand(I), Cycles, VM,
Flags, TypeMapper, Materializer));
trackCyclesUnderDistinct(NewMD, Cycles);
return NewMD;
}
@ -252,9 +273,11 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
std::unique_ptr<MDNodeFwdDecl> Dummy(
MDNode::getTemporary(Node->getContext(), None));
mapToMetadata(VM, Node, Dummy.get());
Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer,
/* IsDistinct */ true);
auto *NewMD = cast<UniquableMDNode>(cloneMDNode(Node, Cycles, VM, Flags,
TypeMapper, Materializer,
/* IsDistinct */ true));
Dummy->replaceAllUsesWith(NewMD);
trackCyclesUnderDistinct(NewMD, Cycles);
return mapToMetadata(VM, Node, NewMD);
}
@ -263,13 +286,14 @@ static Metadata *mapDistinctNode(const UniquableMDNode *Node,
/// Check whether a uniqued node needs to be remapped (due to any operands
/// changing).
static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
// Check all operands to see if any need to be remapped.
for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
Metadata *Op = Node->getOperand(I);
if (Op != mapMetadataOp(Op, VM, Flags, TypeMapper, Materializer))
if (Op != mapMetadataOp(Op, Cycles, VM, Flags, TypeMapper, Materializer))
return true;
}
return false;
@ -279,9 +303,10 @@ static bool shouldRemapUniquedNode(const UniquableMDNode *Node,
///
/// Uniqued nodes may not need to be recreated (they may map to themselves).
static Metadata *mapUniquedNode(const UniquableMDNode *Node,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
assert(!Node->isDistinct() && "Expected uniqued node");
// Create a dummy node in case we have a metadata cycle.
@ -289,7 +314,8 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,
mapToMetadata(VM, Node, Dummy);
// Check all operands to see if any need to be remapped.
if (!shouldRemapUniquedNode(Node, VM, Flags, TypeMapper, Materializer)) {
if (!shouldRemapUniquedNode(Node, Cycles, VM, Flags, TypeMapper,
Materializer)) {
// Use an identity mapping.
mapToSelf(VM, Node);
MDNode::deleteTemporary(Dummy);
@ -297,15 +323,17 @@ static Metadata *mapUniquedNode(const UniquableMDNode *Node,
}
// At least one operand needs remapping.
Metadata *NewMD = cloneMDNode(Node, VM, Flags, TypeMapper, Materializer,
/* IsDistinct */ false);
Metadata *NewMD =
cloneMDNode(Node, Cycles, VM, Flags, TypeMapper, Materializer,
/* IsDistinct */ false);
Dummy->replaceAllUsesWith(NewMD);
MDNode::deleteTemporary(Dummy);
return mapToMetadata(VM, Node, NewMD);
}
static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags,
static Metadata *MapMetadataImpl(const Metadata *MD,
SmallVectorImpl<UniquableMDNode *> &Cycles,
ValueToValueMapTy &VM, RemapFlags Flags,
ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
// If the value already exists in the map, use it.
@ -345,18 +373,30 @@ static Metadata *MapMetadataImpl(const Metadata *MD, ValueToValueMapTy &VM,
return mapToSelf(VM, MD);
if (Node->isDistinct())
return mapDistinctNode(Node, VM, Flags, TypeMapper, Materializer);
return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
return mapUniquedNode(Node, VM, Flags, TypeMapper, Materializer);
return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
}
Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
ValueMaterializer *Materializer) {
Metadata *NewMD = MapMetadataImpl(MD, VM, Flags, TypeMapper, Materializer);
if (NewMD && NewMD != MD)
SmallVector<UniquableMDNode *, 8> Cycles;
Metadata *NewMD =
MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
// Resolve cycles underneath MD.
if (NewMD && NewMD != MD) {
if (auto *N = dyn_cast<UniquableMDNode>(NewMD))
N->resolveCycles();
for (UniquableMDNode *N : Cycles)
N->resolveCycles();
} else {
// Shouldn't get unresolved cycles if nothing was remapped.
assert(Cycles.empty() && "Expected no unresolved cycles");
}
return NewMD;
}

View File

@ -75,6 +75,18 @@ static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
/// \brief Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
/// vectors. We also filter target specific types which have absolutely no
/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
/// avoids spending time checking the cost model and realizing that they will
/// be inevitably scalarized.
static bool isValidElementType(Type *Ty) {
return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
!Ty->isPPC_FP128Ty();
}
/// \returns the parent basic block if all of the instructions in \p VL
/// are in the same block or null otherwise.
static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
@ -208,6 +220,8 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
MD = MDNode::getMostGenericTBAA(MD, IMD);
break;
case LLVMContext::MD_alias_scope:
MD = MDNode::getMostGenericAliasScope(MD, IMD);
break;
case LLVMContext::MD_noalias:
MD = MDNode::intersect(MD, IMD);
break;
@ -1214,7 +1228,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Type *SrcTy = VL0->getOperand(0)->getType();
for (unsigned i = 0; i < VL.size(); ++i) {
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || Ty->isAggregateType() || Ty->isVectorTy()) {
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
@ -3128,7 +3142,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
if (!isValidElementType(Ty))
continue;
// Find the base pointer.
@ -3169,7 +3183,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
for (int i = 0, e = VL.size(); i < e; ++i) {
Type *Ty = VL[i]->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
if (!isValidElementType(Ty))
return false;
Instruction *Inst = dyn_cast<Instruction>(VL[i]);
if (!Inst || Inst->getOpcode() != Opcode0)
@ -3389,7 +3403,7 @@ public:
return false;
Type *Ty = B->getType();
if (Ty->isVectorTy())
if (!isValidElementType(Ty))
return false;
ReductionOpcode = B->getOpcode();

Binary file not shown.

View File

@ -0,0 +1,3 @@
; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
CHECK: Error parsing bitcode: Unknown attribute kind (48)

View File

@ -0,0 +1,11 @@
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
; CHECK-LABEL: test_mismatched_setcc:
; CHECK: cmeq [[CMP128:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
; CHECK: xtn {{v[0-9]+}}.4h, [[CMP128]].4s
%tst = icmp eq <4 x i22> %l, %r
store <4 x i1> %tst, <4 x i1>* %addr
ret void
}

View File

@ -10,6 +10,6 @@ entry:
ret void
}
; CHECK: .section .rdata,"rd"
; CHECK: .section .rdata,"dr"
; CHECK-NOT: .section ".rodata.str1.1"

View File

@ -7,6 +7,6 @@ entry:
ret void
}
; CHECK: .section .CRT$XCU,"rd"
; CHECK: .section .CRT$XCU,"dr"
; CHECK: .long function

View File

@ -9,8 +9,8 @@
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
; NO-REALIGN-LABEL: test1
; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@ -21,14 +21,16 @@ entry:
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>* @T3_retval, align 16
@ -42,8 +44,8 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
; REALIGN-LABEL: test2
; REALIGN: bfc sp, #0, #6
; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@ -63,7 +65,8 @@ entry:
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #16
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
%retval = alloca <16 x float>, align 16
%0 = load <16 x float>* @T3_retval, align 16

View File

@ -46,8 +46,10 @@ entry:
; CHECK: movw [[REG2:r[0-9]+]], #16716
; CHECK: movt [[REG2:r[0-9]+]], #72
; CHECK: str [[REG2]], [r0, #32]
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
; CHECK: adds r0, #16
; CHECK: adds r1, #16
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
@ -57,8 +59,10 @@ entry:
define void @t3(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t3:
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
; CHECK: adds r0, #16
; CHECK: adds r1, #16
; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
@ -69,8 +73,7 @@ define void @t4(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t4:
; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
; CHECK: strh [[REG5:r[0-9]+]], [r0]
; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
ret void
}

View File

@ -0,0 +1,11 @@
; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
define void @test_mismatched_setcc(<4 x i22> %l, <4 x i22> %r, <4 x i1>* %addr) {
; CHECK-LABEL: test_mismatched_setcc:
; CHECK: vceq.i32 [[CMP128:q[0-9]+]], {{q[0-9]+}}, {{q[0-9]+}}
; CHECK: vmovn.i32 {{d[0-9]+}}, [[CMP128]]
%tst = icmp eq <4 x i22> %l, %r
store <4 x i1> %tst, <4 x i1>* %addr
ret void
}

View File

@ -88,6 +88,19 @@ if.end11: ; preds = %num2long.exit
ret i32 23
}
; When considering the producer of cmp's src as the subsuming instruction,
; only consider that when the comparison is to 0.
define i32 @cmp_src_nonzero(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_src_nonzero:
; CHECK: sub
; CHECK: cmp
%sub = sub i32 %a, %b
%cmp = icmp eq i32 %sub, 17
%ret = select i1 %cmp, i32 %x, i32 %y
ret i32 %ret
}
define float @float_sel(i32 %a, i32 %b, float %x, float %y) {
entry:
; CHECK-LABEL: float_sel:
@ -144,3 +157,50 @@ entry:
store i32 %sub, i32* @t
ret double %ret
}
declare void @abort()
declare void @exit(i32)
; If the comparison uses the V bit (signed overflow/underflow), we can't
; omit the comparison.
define i32 @cmp_slt0(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_slt0
; CHECK: sub
; CHECK: cmp
; CHECK: bge
%load = load i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp slt i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else
if.then:
call void @abort()
unreachable
if.else:
call void @exit(i32 0)
unreachable
}
; Same for the C bit. (Note the ult X, 0 is trivially
; false, so the DAG combiner may or may not optimize it).
define i32 @cmp_ult0(i32 %a, i32 %b, i32 %x, i32 %y) {
entry:
; CHECK-LABEL: cmp_ult0
; CHECK: sub
; CHECK: cmp
; CHECK: bhs
%load = load i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp ult i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else
if.then:
call void @abort()
unreachable
if.else:
call void @exit(i32 0)
unreachable
}

View File

@ -1,253 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
target triple = "thumbv7s-apple-ios8.0.0"
define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
;CHECK-LABEL: load_v8i8:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <8 x i8>** %ptr
%lA = load <8 x i8>* %A, align 1
ret <8 x i8> %lA
}
define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
;CHECK-LABEL: load_v8i8_update:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <8 x i8>** %ptr
%lA = load <8 x i8>* %A, align 1
%inc = getelementptr <8 x i8>* %A, i38 1
store <8 x i8>* %inc, <8 x i8>** %ptr
ret <8 x i8> %lA
}
define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
;CHECK-LABEL: load_v4i16:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <4 x i16>** %ptr
%lA = load <4 x i16>* %A, align 1
ret <4 x i16> %lA
}
define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
;CHECK-LABEL: load_v4i16_update:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x i16>** %ptr
%lA = load <4 x i16>* %A, align 1
%inc = getelementptr <4 x i16>* %A, i34 1
store <4 x i16>* %inc, <4 x i16>** %ptr
ret <4 x i16> %lA
}
define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
;CHECK-LABEL: load_v2i32:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <2 x i32>** %ptr
%lA = load <2 x i32>* %A, align 1
ret <2 x i32> %lA
}
define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
;CHECK-LABEL: load_v2i32_update:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i32>** %ptr
%lA = load <2 x i32>* %A, align 1
%inc = getelementptr <2 x i32>* %A, i32 1
store <2 x i32>* %inc, <2 x i32>** %ptr
ret <2 x i32> %lA
}
define <2 x float> @load_v2f32(<2 x float>** %ptr) {
;CHECK-LABEL: load_v2f32:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <2 x float>** %ptr
%lA = load <2 x float>* %A, align 1
ret <2 x float> %lA
}
define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
;CHECK-LABEL: load_v2f32_update:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x float>** %ptr
%lA = load <2 x float>* %A, align 1
%inc = getelementptr <2 x float>* %A, i32 1
store <2 x float>* %inc, <2 x float>** %ptr
ret <2 x float> %lA
}
define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
;CHECK-LABEL: load_v1i64:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <1 x i64>** %ptr
%lA = load <1 x i64>* %A, align 1
ret <1 x i64> %lA
}
define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
;CHECK-LABEL: load_v1i64_update:
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <1 x i64>** %ptr
%lA = load <1 x i64>* %A, align 1
%inc = getelementptr <1 x i64>* %A, i31 1
store <1 x i64>* %inc, <1 x i64>** %ptr
ret <1 x i64> %lA
}
define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
;CHECK-LABEL: load_v16i8:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <16 x i8>** %ptr
%lA = load <16 x i8>* %A, align 1
ret <16 x i8> %lA
}
define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
;CHECK-LABEL: load_v16i8_update:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <16 x i8>** %ptr
%lA = load <16 x i8>* %A, align 1
%inc = getelementptr <16 x i8>* %A, i316 1
store <16 x i8>* %inc, <16 x i8>** %ptr
ret <16 x i8> %lA
}
define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
;CHECK-LABEL: load_v8i16:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <8 x i16>** %ptr
%lA = load <8 x i16>* %A, align 1
ret <8 x i16> %lA
}
define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
;CHECK-LABEL: load_v8i16_update:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <8 x i16>** %ptr
%lA = load <8 x i16>* %A, align 1
%inc = getelementptr <8 x i16>* %A, i38 1
store <8 x i16>* %inc, <8 x i16>** %ptr
ret <8 x i16> %lA
}
define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
;CHECK-LABEL: load_v4i32:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <4 x i32>** %ptr
%lA = load <4 x i32>* %A, align 1
ret <4 x i32> %lA
}
define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
;CHECK-LABEL: load_v4i32_update:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x i32>** %ptr
%lA = load <4 x i32>* %A, align 1
%inc = getelementptr <4 x i32>* %A, i34 1
store <4 x i32>* %inc, <4 x i32>** %ptr
ret <4 x i32> %lA
}
define <4 x float> @load_v4f32(<4 x float>** %ptr) {
;CHECK-LABEL: load_v4f32:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <4 x float>** %ptr
%lA = load <4 x float>* %A, align 1
ret <4 x float> %lA
}
define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
;CHECK-LABEL: load_v4f32_update:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x float>** %ptr
%lA = load <4 x float>* %A, align 1
%inc = getelementptr <4 x float>* %A, i34 1
store <4 x float>* %inc, <4 x float>** %ptr
ret <4 x float> %lA
}
define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 1
ret <2 x i64> %lA
}
define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64_update:
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 1
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret <2 x i64> %lA
}
; Make sure we change the type to match alignment if necessary.
define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64_update_aligned2:
;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 2
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret <2 x i64> %lA
}
define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64_update_aligned4:
;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 4
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret <2 x i64> %lA
}
define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64_update_aligned8:
;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 8
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret <2 x i64> %lA
}
define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
;CHECK-LABEL: load_v2i64_update_aligned16:
;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
%A = load <2 x i64>** %ptr
%lA = load <2 x i64>* %A, align 16
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret <2 x i64> %lA
}
; Make sure we don't break smaller-than-dreg extloads.
define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
;CHECK-LABEL: zextload_v8i8tov8i32:
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
%A = load <4 x i8>** %ptr
%lA = load <4 x i8>* %A, align 4
%zlA = zext <4 x i8> %lA to <4 x i32>
ret <4 x i32> %zlA
}
define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
;CHECK: ldr.w r[[PTRREG:[0-9]+]], [r0]
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
;CHECK: str.w r[[INCREG]], [r0]
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
%A = load <4 x i8>** %ptr
%lA = load <4 x i8>* %A, align 4
%inc = getelementptr <4 x i8>* %A, i38 4
store <4 x i8>* %inc, <4 x i8>** %ptr
%zlA = zext <4 x i8> %lA to <4 x i32>
ret <4 x i32> %zlA
}

View File

@ -1,258 +0,0 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
target triple = "thumbv7s-apple-ios8.0.0"
define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
;CHECK-LABEL: store_v8i8:
;CHECK: str r1, [r0]
%A = load <8 x i8>** %ptr
store <8 x i8> %val, <8 x i8>* %A, align 1
ret void
}
define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
;CHECK-LABEL: store_v8i8_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <8 x i8>** %ptr
store <8 x i8> %val, <8 x i8>* %A, align 1
%inc = getelementptr <8 x i8>* %A, i38 1
store <8 x i8>* %inc, <8 x i8>** %ptr
ret void
}
define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
;CHECK-LABEL: store_v4i16:
;CHECK: str r1, [r0]
%A = load <4 x i16>** %ptr
store <4 x i16> %val, <4 x i16>* %A, align 1
ret void
}
define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
;CHECK-LABEL: store_v4i16_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x i16>** %ptr
store <4 x i16> %val, <4 x i16>* %A, align 1
%inc = getelementptr <4 x i16>* %A, i34 1
store <4 x i16>* %inc, <4 x i16>** %ptr
ret void
}
define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
;CHECK-LABEL: store_v2i32:
;CHECK: str r1, [r0]
%A = load <2 x i32>** %ptr
store <2 x i32> %val, <2 x i32>* %A, align 1
ret void
}
define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
;CHECK-LABEL: store_v2i32_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i32>** %ptr
store <2 x i32> %val, <2 x i32>* %A, align 1
%inc = getelementptr <2 x i32>* %A, i32 1
store <2 x i32>* %inc, <2 x i32>** %ptr
ret void
}
define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
;CHECK-LABEL: store_v2f32:
;CHECK: str r1, [r0]
%A = load <2 x float>** %ptr
store <2 x float> %val, <2 x float>* %A, align 1
ret void
}
define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
;CHECK-LABEL: store_v2f32_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x float>** %ptr
store <2 x float> %val, <2 x float>* %A, align 1
%inc = getelementptr <2 x float>* %A, i32 1
store <2 x float>* %inc, <2 x float>** %ptr
ret void
}
define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
;CHECK-LABEL: store_v1i64:
;CHECK: str r1, [r0]
%A = load <1 x i64>** %ptr
store <1 x i64> %val, <1 x i64>* %A, align 1
ret void
}
define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
;CHECK-LABEL: store_v1i64_update:
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <1 x i64>** %ptr
store <1 x i64> %val, <1 x i64>* %A, align 1
%inc = getelementptr <1 x i64>* %A, i31 1
store <1 x i64>* %inc, <1 x i64>** %ptr
ret void
}
define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
;CHECK-LABEL: store_v16i8:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <16 x i8>** %ptr
store <16 x i8> %val, <16 x i8>* %A, align 1
ret void
}
define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
;CHECK-LABEL: store_v16i8_update:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <16 x i8>** %ptr
store <16 x i8> %val, <16 x i8>* %A, align 1
%inc = getelementptr <16 x i8>* %A, i316 1
store <16 x i8>* %inc, <16 x i8>** %ptr
ret void
}
define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
;CHECK-LABEL: store_v8i16:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <8 x i16>** %ptr
store <8 x i16> %val, <8 x i16>* %A, align 1
ret void
}
define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
;CHECK-LABEL: store_v8i16_update:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <8 x i16>** %ptr
store <8 x i16> %val, <8 x i16>* %A, align 1
%inc = getelementptr <8 x i16>* %A, i38 1
store <8 x i16>* %inc, <8 x i16>** %ptr
ret void
}
define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
;CHECK-LABEL: store_v4i32:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <4 x i32>** %ptr
store <4 x i32> %val, <4 x i32>* %A, align 1
ret void
}
define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
;CHECK-LABEL: store_v4i32_update:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x i32>** %ptr
store <4 x i32> %val, <4 x i32>* %A, align 1
%inc = getelementptr <4 x i32>* %A, i34 1
store <4 x i32>* %inc, <4 x i32>** %ptr
ret void
}
define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
;CHECK-LABEL: store_v4f32:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <4 x float>** %ptr
store <4 x float> %val, <4 x float>* %A, align 1
ret void
}
define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
;CHECK-LABEL: store_v4f32_update:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <4 x float>** %ptr
store <4 x float> %val, <4 x float>* %A, align 1
%inc = getelementptr <4 x float>* %A, i34 1
store <4 x float>* %inc, <4 x float>** %ptr
ret void
}
define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 1
ret void
}
define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64_update:
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 1
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret void
}
define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64_update_aligned2:
;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 2
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret void
}
define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64_update_aligned4:
;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 4
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret void
}
define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64_update_aligned8:
;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 8
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret void
}
define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
;CHECK-LABEL: store_v2i64_update_aligned16:
;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
%A = load <2 x i64>** %ptr
store <2 x i64> %val, <2 x i64>* %A, align 16
%inc = getelementptr <2 x i64>* %A, i32 1
store <2 x i64>* %inc, <2 x i64>** %ptr
ret void
}
define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
;CHECK-LABEL: truncstore_v4i32tov4i8:
;CHECK: ldr.w r9, [sp]
;CHECK: vmov {{d[0-9]+}}, r3, r9
;CHECK: vmov {{d[0-9]+}}, r1, r2
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
%A = load <4 x i8>** %ptr
%trunc = trunc <4 x i32> %val to <4 x i8>
store <4 x i8> %trunc, <4 x i8>* %A, align 4
ret void
}
define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) {
;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update:
;CHECK: ldr.w r9, [sp]
;CHECK: vmov {{d[0-9]+}}, r3, r9
;CHECK: vmov {{d[0-9]+}}, r1, r2
;CHECK: movs [[IMM16:r[0-9]+]], #16
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
;CHECK: str r[[PTRREG]], [r0]
%A = load <4 x i8>** %ptr
%trunc = trunc <4 x i32> %val to <4 x i8>
store <4 x i8> %trunc, <4 x i8>* %A, align 4
%inc = getelementptr <4 x i8>* %A, i38 4
store <4 x i8>* %inc, <4 x i8>** %ptr
ret void
}

View File

@ -0,0 +1,23 @@
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define void @Compute_Lateral() #0 {
entry:
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
%0 = select i1 undef, <2 x double> undef, <2 x double> zeroinitializer
%1 = extractelement <2 x double> %0, i32 1
store double %1, double* undef, align 8
ret void
; CHECK-LABEL: @Compute_Lateral
}
attributes #0 = { nounwind }

View File

@ -0,0 +1,34 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; This tests that the llvm.SI.end.cf intrinsic is not inserted into the
; loop block. This intrinsic will be lowered to s_or_b64 by the code
; generator.
; CHECK-LABEL: {{^}}test:
; This is was lowered from the llvm.SI.end.cf intrinsic:
; CHECK: s_or_b64 exec, exec
; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
; CHECK-NOT: s_or_b64 exec, exec
; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
define void @test(i32 addrspace(1)* %out, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
br i1 %tmp0, label %if, label %loop
if:
store i32 0, i32 addrspace(1)* %out
br label %loop
loop:
%tmp1 = phi i32 [0, %entry], [0, %if], [%inc, %loop]
%inc = add i32 %tmp1, %cond
%tmp2 = icmp ugt i32 %inc, 10
br i1 %tmp2, label %done, label %loop
done:
%tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
store i32 %inc, i32 addrspace(1)* %tmp3
ret void
}

View File

@ -0,0 +1,58 @@
; RUN: opt -loop-unroll -S -mtriple=amdgcn-- -mcpu=SI %s | FileCheck %s
; This IR comes from this OpenCL C code:
;
; if (b + 4 > a) {
; for (int i = 0; i < 4; i++, b++) {
; if (b + 1 <= a)
; *(dst + c + b) = 0;
; else
; break;
; }
; }
;
; This test is meant to check that this loop isn't unrolled into more than
; four iterations. The loop unrolling preferences we currently use cause this
; loop to not be unrolled at all, but that may change in the future.
; CHECK-LABEL: @test
; CHECK: store i8 0, i8 addrspace(1)*
; CHECK-NOT: store i8 0, i8 addrspace(1)*
; CHECK: ret void
define void @test(i8 addrspace(1)* nocapture %dst, i32 %a, i32 %b, i32 %c) {
entry:
%add = add nsw i32 %b, 4
%cmp = icmp sgt i32 %add, %a
br i1 %cmp, label %for.cond.preheader, label %if.end7
for.cond.preheader: ; preds = %entry
%cmp313 = icmp slt i32 %b, %a
br i1 %cmp313, label %if.then4.lr.ph, label %if.end7.loopexit
if.then4.lr.ph: ; preds = %for.cond.preheader
%0 = sext i32 %c to i64
br label %if.then4
if.then4: ; preds = %if.then4.lr.ph, %if.then4
%i.015 = phi i32 [ 0, %if.then4.lr.ph ], [ %inc, %if.then4 ]
%b.addr.014 = phi i32 [ %b, %if.then4.lr.ph ], [ %add2, %if.then4 ]
%add2 = add nsw i32 %b.addr.014, 1
%1 = sext i32 %b.addr.014 to i64
%add.ptr.sum = add nsw i64 %1, %0
%add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum
store i8 0, i8 addrspace(1)* %add.ptr5, align 1
%inc = add nsw i32 %i.015, 1
%cmp1 = icmp slt i32 %inc, 4
%cmp3 = icmp slt i32 %add2, %a
%or.cond = and i1 %cmp3, %cmp1
br i1 %or.cond, label %if.then4, label %for.cond.if.end7.loopexit_crit_edge
for.cond.if.end7.loopexit_crit_edge: ; preds = %if.then4
br label %if.end7.loopexit
if.end7.loopexit: ; preds = %for.cond.if.end7.loopexit_crit_edge, %for.cond.preheader
br label %if.end7
if.end7: ; preds = %if.end7.loopexit, %entry
ret void
}

View File

@ -73,20 +73,20 @@ $vftable = comdat largest
; CHECK: .globl @v8@0
; CHECK: .section .text,"xr",discard,@f8@0
; CHECK: .globl @f8@0
; CHECK: .section .bss,"wb",associative,_f1
; CHECK: .section .bss,"bw",associative,_f1
; CHECK: .globl _v1
; CHECK: .section .bss,"wb",associative,_f2
; CHECK: .section .bss,"bw",associative,_f2
; CHECK: .globl _v2
; CHECK: .section .bss,"wb",associative,_f3
; CHECK: .section .bss,"bw",associative,_f3
; CHECK: .globl _v3
; CHECK: .section .bss,"wb",associative,_f4
; CHECK: .section .bss,"bw",associative,_f4
; CHECK: .globl _v4
; CHECK: .section .bss,"wb",associative,_f5
; CHECK: .section .bss,"bw",associative,_f5
; CHECK: .globl _v5
; CHECK: .section .bss,"wb",associative,_f6
; CHECK: .section .bss,"bw",associative,_f6
; CHECK: .globl _v6
; CHECK: .section .bss,"wb",same_size,_f6
; CHECK: .section .bss,"bw",same_size,_f6
; CHECK: .globl _f6
; CHECK: .section .rdata,"rd",largest,_vftable
; CHECK: .section .rdata,"dr",largest,_vftable
; CHECK: .globl _vftable
; CHECK: _vftable = L_some_name+4

View File

@ -0,0 +1,35 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define void @PR22524({ float, float }* %arg) {
; Check that we can materialize the zero constants we store in two places here,
; and at least form a legal store of the floating point value at the end.
; The DAG combiner at one point contained bugs that given enough permutations
; would incorrectly form an illegal operation for the last of these stores when
; it folded it to a zero too late to legalize the zero store operation. If this
; ever starts forming a zero store instead of movss, the test case has stopped
; being useful.
;
; CHECK-LABEL: PR22524:
entry:
%0 = getelementptr inbounds { float, float }* %arg, i32 0, i32 1
store float 0.000000e+00, float* %0, align 4
; CHECK: movl $0, 4(%rdi)
%1 = getelementptr inbounds { float, float }* %arg, i64 0, i32 0
%2 = bitcast float* %1 to i64*
%3 = load i64* %2, align 8
%4 = trunc i64 %3 to i32
%5 = lshr i64 %3, 32
%6 = trunc i64 %5 to i32
%7 = bitcast i32 %6 to float
%8 = fmul float %7, 0.000000e+00
%9 = bitcast float* %1 to i32*
store i32 %6, i32* %9, align 4
; CHECK: movl $0, (%rdi)
store float %8, float* %0, align 4
; CHECK: movss %{{.*}}, 4(%rdi)
ret void
}

View File

@ -40,18 +40,18 @@ define weak_odr dllexport void @weak1() {
; CHECK: .globl Var1
@Var1 = dllexport global i32 1, align 4
; CHECK: .rdata,"rd"
; CHECK: .rdata,"dr"
; CHECK: .globl Var2
@Var2 = dllexport unnamed_addr constant i32 1
; CHECK: .comm Var3
@Var3 = common dllexport global i32 0, align 4
; CHECK: .section .data,"wd",discard,WeakVar1
; CHECK: .section .data,"dw",discard,WeakVar1
; CHECK: .globl WeakVar1
@WeakVar1 = weak_odr dllexport global i32 1, align 4
; CHECK: .section .rdata,"rd",discard,WeakVar2
; CHECK: .section .rdata,"dr",discard,WeakVar2
; CHECK: .globl WeakVar2
@WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1

View File

@ -21,6 +21,8 @@ define dllexport void @f2() unnamed_addr {
ret void
}
declare dllexport void @not_defined()
; CHECK: .globl _stdfun@0
define dllexport x86_stdcallcc void @stdfun() nounwind {
ret void
@ -59,18 +61,18 @@ define weak_odr dllexport void @weak1() {
; CHECK: .globl _Var1
@Var1 = dllexport global i32 1, align 4
; CHECK: .rdata,"rd"
; CHECK: .rdata,"dr"
; CHECK: .globl _Var2
@Var2 = dllexport unnamed_addr constant i32 1
; CHECK: .comm _Var3
@Var3 = common dllexport global i32 0, align 4
; CHECK: .section .data,"wd",discard,_WeakVar1
; CHECK: .section .data,"dw",discard,_WeakVar1
; CHECK: .globl _WeakVar1
@WeakVar1 = weak_odr dllexport global i32 1, align 4
; CHECK: .section .rdata,"rd",discard,_WeakVar2
; CHECK: .section .rdata,"dr",discard,_WeakVar2
; CHECK: .globl _WeakVar2
@WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
@ -91,7 +93,6 @@ define weak_odr dllexport void @weak1() {
; CHECK: _weak_alias = _f1
@weak_alias = weak_odr dllexport alias void()* @f1
; CHECK: .section .drectve
; CHECK-CL: " /EXPORT:_Var1,DATA"
; CHECK-CL: " /EXPORT:_Var2,DATA"
@ -100,6 +101,7 @@ define weak_odr dllexport void @weak1() {
; CHECK-CL: " /EXPORT:_WeakVar2,DATA"
; CHECK-CL: " /EXPORT:_f1"
; CHECK-CL: " /EXPORT:_f2"
; CHECK-CL-NOT: not_exported
; CHECK-CL: " /EXPORT:_stdfun@0"
; CHECK-CL: " /EXPORT:@fastfun@0"
; CHECK-CL: " /EXPORT:_thisfun"
@ -117,6 +119,7 @@ define weak_odr dllexport void @weak1() {
; CHECK-GCC: " -export:WeakVar2,data"
; CHECK-GCC: " -export:f1"
; CHECK-GCC: " -export:f2"
; CHECK-CL-NOT: not_exported
; CHECK-GCC: " -export:stdfun@0"
; CHECK-GCC: " -export:@fastfun@0"
; CHECK-GCC: " -export:thisfun"

View File

@ -1,16 +1,31 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
;CHECK: @test
; No need to load from memory. The operand will be loaded as part of th AND instr.
;CHECK-NOT: vmovaps
;CHECK: vandps
;CHECK: ret
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE
define void @test1(<8 x i32>* %p0, <8 x i32> %in1) nounwind {
entry:
%in0 = load <8 x i32>* %p0, align 2
%a = and <8 x i32> %in0, %in1
store <8 x i32> %a, <8 x i32>* undef
ret void
; No need to load unaligned operand from memory using an explicit instruction with AVX.
; The operand should be folded into the AND instr.
; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
; unless specially configured on some CPUs such as AMD Family 10H.
define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
%in0 = load <4 x i32>* %p0, align 2
%a = and <4 x i32> %in0, %in1
ret <4 x i32> %a
; CHECK-LABEL: @test1
; CHECK-NOT: vmovups
; CHECK: vandps (%rdi), %xmm0, %xmm0
; CHECK-NEXT: ret
; SSE-LABEL: @test1
; SSE: movups (%rdi), %xmm1
; SSE-NEXT: andps %xmm1, %xmm0
; SSE-NEXT: ret
}

View File

@ -48,7 +48,7 @@ define void @F1() {
; LINUX-SECTIONS: .section .rodata.G3,"a",@progbits
; LINUX-SECTIONS: .globl G3
; WIN32-SECTIONS: .section .rdata,"rd",one_only,_G3
; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G3
; WIN32-SECTIONS: .globl _G3
@ -126,7 +126,7 @@ define void @F1() {
; LINUX-SECTIONS: .section .rodata.G7,"aMS",@progbits,1
; LINUX-SECTIONS: .globl G7
; WIN32-SECTIONS: .section .rdata,"rd",one_only,_G7
; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G7
; WIN32-SECTIONS: .globl _G7
@ -189,7 +189,7 @@ define void @F1() {
; LINUX-SECTIONS: .asciz "foo"
; LINUX-SECTIONS: .size .LG14, 4
; WIN32-SECTIONS: .section .rdata,"rd"
; WIN32-SECTIONS: .section .rdata,"dr"
; WIN32-SECTIONS: L_G14:
; WIN32-SECTIONS: .asciz "foo"
@ -211,5 +211,5 @@ define void @F1() {
; LINUX-SECTIONS: .section .rodata.G15,"aM",@progbits,8
; LINUX-SECTIONS: G15:
; WIN32-SECTIONS: .section .rdata,"rd",one_only,_G15
; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G15
; WIN32-SECTIONS: _G15:

View File

@ -4,8 +4,7 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind {
%ret = load <4 x i3>* %in, align 1
ret <4 x i3> %ret
}
; CHECK: test1
; CHECK-LABEL: test1
; CHECK: movzwl
; CHECK: shrl $3
; CHECK: andl $7
@ -25,7 +24,7 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind {
ret <4 x i1> %ret
}
; CHECK: test2
; CHECK-LABEL: test2
; CHECK: movzbl
; CHECK: shrl
; CHECK: andl $1
@ -46,7 +45,7 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
ret <4 x i64> %sext
}
; CHECK: test3
; CHECK-LABEL: test3
; CHECK: movzbl
; CHECK: movq
; CHECK: shlq
@ -67,3 +66,71 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
; CHECK: vpunpcklqdq
; CHECK: vinsertf128
; CHECK: ret
define <16 x i4> @test4(<16 x i4>* %in) nounwind {
%ret = load <16 x i4>* %in, align 1
ret <16 x i4> %ret
}
; CHECK-LABEL: test4
; CHECK: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: movl
; CHECK-NEXT: andl
; CHECK-NEXT: vmovd
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movl
; CHECK-NEXT: shrl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: movq
; CHECK-NEXT: shrq
; CHECK-NEXT: andl
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: shrq
; CHECK-NEXT: vpinsrb
; CHECK-NEXT: retq

View File

@ -37,4 +37,16 @@ define <16 x i8> @test4(<2 x i64>* %V) {
ret <16 x i8> %1
}
define <16 x i8> @test5() {
; CHECK-LABEL: test5
; CHECK: pshufb {{.*}}
store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16
%l = load <2 x i64>* undef, align 16
%shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %shuffle, <2 x i64>* undef, align 16
%1 = load <16 x i8>* undef, align 16
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1)
ret <16 x i8> %2
}
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone

View File

@ -1,175 +0,0 @@
; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
define void @two_invoke_merged() {
entry:
invoke void @try_body()
to label %again unwind label %lpad
again:
invoke void @try_body()
to label %done unwind label %lpad
done:
ret void
lpad:
%vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @filt1 to i8*)
%sel = extractvalue { i8*, i32 } %vals, 1
call void @use_selector(i32 %sel)
ret void
}
; Normal path code
; CHECK-LABEL: {{^}}two_invoke_merged:
; CHECK: .seh_proc two_invoke_merged
; CHECK: .seh_handler __C_specific_handler, @unwind, @except
; CHECK: .Ltmp0:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp1:
; CHECK: .Ltmp2:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp3:
; CHECK: retq
; Landing pad code
; CHECK: .Ltmp5:
; CHECK: movl $1, %ecx
; CHECK: jmp
; CHECK: .Ltmp6:
; CHECK: movl $2, %ecx
; CHECK: callq use_selector
; CHECK: .seh_handlerdata
; CHECK-NEXT: .long 2
; CHECK-NEXT: .long .Ltmp0@IMGREL
; CHECK-NEXT: .long .Ltmp3@IMGREL+1
; CHECK-NEXT: .long filt0@IMGREL
; CHECK-NEXT: .long .Ltmp5@IMGREL
; CHECK-NEXT: .long .Ltmp0@IMGREL
; CHECK-NEXT: .long .Ltmp3@IMGREL+1
; CHECK-NEXT: .long filt1@IMGREL
; CHECK-NEXT: .long .Ltmp6@IMGREL
; CHECK: .text
; CHECK: .seh_endproc
define void @two_invoke_gap() {
entry:
invoke void @try_body()
to label %again unwind label %lpad
again:
call void @do_nothing_on_unwind()
invoke void @try_body()
to label %done unwind label %lpad
done:
ret void
lpad:
%vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
%sel = extractvalue { i8*, i32 } %vals, 1
call void @use_selector(i32 %sel)
ret void
}
; Normal path code
; CHECK-LABEL: {{^}}two_invoke_gap:
; CHECK: .seh_proc two_invoke_gap
; CHECK: .seh_handler __C_specific_handler, @unwind, @except
; CHECK: .Ltmp11:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp12:
; CHECK: callq do_nothing_on_unwind
; CHECK: .Ltmp13:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp14:
; CHECK: retq
; Landing pad code
; CHECK: .Ltmp16:
; CHECK: movl $1, %ecx
; CHECK: callq use_selector
; CHECK: .seh_handlerdata
; CHECK-NEXT: .long 2
; CHECK-NEXT: .long .Ltmp11@IMGREL
; CHECK-NEXT: .long .Ltmp12@IMGREL+1
; CHECK-NEXT: .long filt0@IMGREL
; CHECK-NEXT: .long .Ltmp16@IMGREL
; CHECK-NEXT: .long .Ltmp13@IMGREL
; CHECK-NEXT: .long .Ltmp14@IMGREL+1
; CHECK-NEXT: .long filt0@IMGREL
; CHECK-NEXT: .long .Ltmp16@IMGREL
; CHECK: .text
; CHECK: .seh_endproc
define void @two_invoke_nounwind_gap() {
entry:
invoke void @try_body()
to label %again unwind label %lpad
again:
call void @cannot_unwind()
invoke void @try_body()
to label %done unwind label %lpad
done:
ret void
lpad:
%vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
%sel = extractvalue { i8*, i32 } %vals, 1
call void @use_selector(i32 %sel)
ret void
}
; Normal path code
; CHECK-LABEL: {{^}}two_invoke_nounwind_gap:
; CHECK: .seh_proc two_invoke_nounwind_gap
; CHECK: .seh_handler __C_specific_handler, @unwind, @except
; CHECK: .Ltmp21:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp22:
; CHECK: callq cannot_unwind
; CHECK: .Ltmp23:
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp24:
; CHECK: retq
; Landing pad code
; CHECK: .Ltmp26:
; CHECK: movl $1, %ecx
; CHECK: callq use_selector
; CHECK: .seh_handlerdata
; CHECK-NEXT: .long 1
; CHECK-NEXT: .long .Ltmp21@IMGREL
; CHECK-NEXT: .long .Ltmp24@IMGREL+1
; CHECK-NEXT: .long filt0@IMGREL
; CHECK-NEXT: .long .Ltmp26@IMGREL
; CHECK: .text
; CHECK: .seh_endproc
declare void @try_body()
declare void @do_nothing_on_unwind()
declare void @cannot_unwind() nounwind
declare void @use_selector(i32)
declare i32 @filt0(i8* %eh_info, i8* %rsp)
declare i32 @filt1(i8* %eh_info, i8* %rsp)
declare void @handler0()
declare void @handler1()
declare i32 @__C_specific_handler(...)
declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind

View File

@ -1,196 +0,0 @@
; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
; This test case is also intended to be run manually as a complete functional
; test. It should link, print something, and exit zero rather than crashing.
; It is the hypothetical lowering of a C source program that looks like:
;
; int safe_div(int *n, int *d) {
; int r;
; __try {
; __try {
; r = *n / *d;
; } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) {
; puts("EXCEPTION_ACCESS_VIOLATION");
; r = -1;
; }
; } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) {
; puts("EXCEPTION_INT_DIVIDE_BY_ZERO");
; r = -2;
; }
; return r;
; }
@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00"
@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00"
define i32 @safe_div(i32* %n, i32* %d) {
entry:
%r = alloca i32, align 4
invoke void @try_body(i32* %r, i32* %n, i32* %d)
to label %__try.cont unwind label %lpad
lpad:
%vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
%ehptr = extractvalue { i8*, i32 } %vals, 0
%sel = extractvalue { i8*, i32 } %vals, 1
%filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
%is_filt0 = icmp eq i32 %sel, %filt0_val
br i1 %is_filt0, label %handler0, label %eh.dispatch1
eh.dispatch1:
%filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
%is_filt1 = icmp eq i32 %sel, %filt1_val
br i1 %is_filt1, label %handler1, label %eh.resume
handler0:
call void @puts(i8* getelementptr ([27 x i8]* @str1, i32 0, i32 0))
store i32 -1, i32* %r, align 4
br label %__try.cont
handler1:
call void @puts(i8* getelementptr ([29 x i8]* @str2, i32 0, i32 0))
store i32 -2, i32* %r, align 4
br label %__try.cont
eh.resume:
resume { i8*, i32 } %vals
__try.cont:
%safe_ret = load i32* %r, align 4
ret i32 %safe_ret
}
; Normal path code
; CHECK: {{^}}safe_div:
; CHECK: .seh_proc safe_div
; CHECK: .seh_handler __C_specific_handler, @unwind, @except
; CHECK: .Ltmp0:
; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp1
; CHECK: .LBB0_7:
; CHECK: movl [[rloc]], %eax
; CHECK: retq
; Landing pad code
; CHECK: .Ltmp3:
; CHECK: movl $1, %[[sel:[a-z]+]]
; CHECK: .Ltmp4
; CHECK: movl $2, %[[sel]]
; CHECK: .L{{.*}}:
; CHECK: cmpl $1, %[[sel]]
; CHECK: # %handler0
; CHECK: callq puts
; CHECK: movl $-1, [[rloc]]
; CHECK: jmp .LBB0_7
; CHECK: cmpl $2, %[[sel]]
; CHECK: # %handler1
; CHECK: callq puts
; CHECK: movl $-2, [[rloc]]
; CHECK: jmp .LBB0_7
; FIXME: EH preparation should not call _Unwind_Resume.
; CHECK: callq _Unwind_Resume
; CHECK: ud2
; CHECK: .seh_handlerdata
; CHECK: .long 2
; CHECK: .long .Ltmp0@IMGREL
; CHECK: .long .Ltmp1@IMGREL+1
; CHECK: .long safe_div_filt0@IMGREL
; CHECK: .long .Ltmp3@IMGREL
; CHECK: .long .Ltmp0@IMGREL
; CHECK: .long .Ltmp1@IMGREL+1
; CHECK: .long safe_div_filt1@IMGREL
; CHECK: .long .Ltmp4@IMGREL
; CHECK: .text
; CHECK: .seh_endproc
define void @try_body(i32* %r, i32* %n, i32* %d) {
entry:
%0 = load i32* %n, align 4
%1 = load i32* %d, align 4
%div = sdiv i32 %0, %1
store i32 %div, i32* %r, align 4
ret void
}
; The prototype of these filter functions is:
; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp);
; The definition of EXCEPTION_POINTERS is:
; typedef struct _EXCEPTION_POINTERS {
; EXCEPTION_RECORD *ExceptionRecord;
; CONTEXT *ContextRecord;
; } EXCEPTION_POINTERS;
; The definition of EXCEPTION_RECORD is:
; typedef struct _EXCEPTION_RECORD {
; DWORD ExceptionCode;
; ...
; } EXCEPTION_RECORD;
; The exception code can be retreived with two loads, one for the record
; pointer and one for the code. The values of local variables can be
; accessed via rbp, but that would require additional not yet implemented LLVM
; support.
define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) {
%eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
%eh_rec = load i32** %eh_ptrs_c
%eh_code = load i32* %eh_rec
; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
%cmp = icmp eq i32 %eh_code, 3221225477
%filt.res = zext i1 %cmp to i32
ret i32 %filt.res
}
define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) {
%eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
%eh_rec = load i32** %eh_ptrs_c
%eh_code = load i32* %eh_rec
; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
%cmp = icmp eq i32 %eh_code, 3221225620
%filt.res = zext i1 %cmp to i32
ret i32 %filt.res
}
@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00"
define i32 @main() {
%d.addr = alloca i32, align 4
%n.addr = alloca i32, align 4
store i32 10, i32* %n.addr, align 4
store i32 2, i32* %d.addr, align 4
%r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
store i32 10, i32* %n.addr, align 4
store i32 0, i32* %d.addr, align 4
%r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
%r3 = call i32 @safe_div(i32* %n.addr, i32* null)
call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
ret i32 0
}
define void @_Unwind_Resume() {
call void @abort()
unreachable
}
declare i32 @__C_specific_handler(...)
declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
declare void @puts(i8*)
declare void @printf(i8*, ...)
declare void @abort()

View File

@ -1,5 +1,4 @@
; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
; CHECK: addps (
; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
@ -8,4 +7,7 @@ define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
%A = load <4 x float>* %P, align 4
%B = fadd <4 x float> %A, %In
ret <4 x float> %B
; CHECK-LABEL: @foo
; CHECK: addps (
}

View File

@ -6,7 +6,7 @@ define double @double() {
ret double 0x0000000000800000
}
; CHECK: .globl __real@0000000000800000
; CHECK-NEXT: .section .rdata,"rd",discard,__real@0000000000800000
; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800000
; CHECK-NEXT: .align 8
; CHECK-NEXT: __real@0000000000800000:
; CHECK-NEXT: .quad 8388608
@ -18,7 +18,7 @@ define <4 x i32> @vec1() {
ret <4 x i32> <i32 3, i32 2, i32 1, i32 0>
}
; CHECK: .globl __xmm@00000000000000010000000200000003
; CHECK-NEXT: .section .rdata,"rd",discard,__xmm@00000000000000010000000200000003
; CHECK-NEXT: .section .rdata,"dr",discard,__xmm@00000000000000010000000200000003
; CHECK-NEXT: .align 16
; CHECK-NEXT: __xmm@00000000000000010000000200000003:
; CHECK-NEXT: .long 3
@ -33,7 +33,7 @@ define <8 x i16> @vec2() {
ret <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
}
; CHECK: .globl __xmm@00000001000200030004000500060007
; CHECK-NEXT: .section .rdata,"rd",discard,__xmm@00000001000200030004000500060007
; CHECK-NEXT: .section .rdata,"dr",discard,__xmm@00000001000200030004000500060007
; CHECK-NEXT: .align 16
; CHECK-NEXT: __xmm@00000001000200030004000500060007:
; CHECK-NEXT: .short 7
@ -53,7 +53,7 @@ define <4 x float> @undef1() {
ret <4 x float> <float 1.0, float 1.0, float undef, float undef>
; CHECK: .globl __xmm@00000000000000003f8000003f800000
; CHECK-NEXT: .section .rdata,"rd",discard,__xmm@00000000000000003f8000003f800000
; CHECK-NEXT: .section .rdata,"dr",discard,__xmm@00000000000000003f8000003f800000
; CHECK-NEXT: .align 16
; CHECK-NEXT: __xmm@00000000000000003f8000003f800000:
; CHECK-NEXT: .long 1065353216 # float 1

View File

@ -22,7 +22,7 @@
; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:^L.*]]:
;
; X86-LABEL: .section .debug$S,"rd"
; X86-LABEL: .section .debug$S,"dr"
; X86-NEXT: .long 4
; Symbol subsection
; X86-NEXT: .long 241
@ -127,7 +127,7 @@
; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"rd"
; X64-LABEL: .section .debug$S,"dr"
; X64-NEXT: .long 4
; Symbol subsection
; X64-NEXT: .long 241

View File

@ -29,7 +29,7 @@
; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"rd"
; X86-LABEL: .section .debug$S,"dr"
; X86-NEXT: .long 4
; Symbol subsection
; X86-NEXT: .long 241
@ -159,7 +159,7 @@
; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"rd"
; X64-LABEL: .section .debug$S,"dr"
; X64-NEXT: .long 4
; Symbol subsection
; X64-NEXT: .long 241

View File

@ -53,7 +53,7 @@
; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"rd"
; X86-LABEL: .section .debug$S,"dr"
; X86-NEXT: .long 4
; Symbol subsection for x
; X86-NEXT: .long 241
@ -317,7 +317,7 @@
; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"rd"
; X64-LABEL: .section .debug$S,"dr"
; X64-NEXT: .long 4
; Symbol subsection for x
; X64-NEXT: .long 241

View File

@ -20,7 +20,7 @@
; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"rd"
; X86-LABEL: .section .debug$S,"dr"
; X86-NEXT: .long 4
; Symbol subsection
; X86-NEXT: .long 241
@ -118,7 +118,7 @@
; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"rd"
; X64-LABEL: .section .debug$S,"dr"
; X64-NEXT: .long 4
; Symbol subsection
; X64-NEXT: .long 241

View File

@ -22,7 +22,7 @@
; X86-NEXT: [[END_OF_BAR:^L.*]]:{{$}}
; X86-NOT: ret
; X86-LABEL: .section .debug$S,"rd"
; X86-LABEL: .section .debug$S,"dr"
; X86: .secrel32 "?bar@@YAXHZZ"
; X86-NEXT: .secidx "?bar@@YAXHZZ"
; X86: .long 0

View File

@ -6,7 +6,7 @@
; CHECK: .section .apple_types
; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
; WIN32: .section .debug$S,"rd"
; WIN32: .section .debug$S,"dr"
; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s

View File

@ -0,0 +1,33 @@
; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s
; ModuleID = 'test.c'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; Function Attrs: nounwind ssp uwtable
define i32 @foo() #0 {
entry:
ret i32 42, !dbg !13
}
attributes #0 = { nounwind ssp uwtable }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10, !11}
!llvm.ident = !{!12}
!0 = !{!"0x11\0012\00clang version 3.7.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/test.c] [DW_LANG_C99]
!1 = !{!"test.c", !""}
!2 = !{}
!3 = !{!4}
!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/test.c]
!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
!7 = !{!8}
!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 2}
!11 = !{i32 1, !"PIC Level", i32 2}
!12 = !{!"clang version 3.7.0 "}
; An old-style MDLocation should not pass verify.
; CHECK: DISubprogram does not Verify
!13 = !{i32 2, i32 2, !4, null}

View File

@ -1,4 +1,6 @@
; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck %s
; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=2 -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -0,0 +1,13 @@
; RUN: llvm-link -o - -S %s | FileCheck %s
; Crasher for PR22456: MapMetadata() should resolve all cycles.
; CHECK: !named = !{!0}
!named = !{!0}
; CHECK: !0 = distinct !{!1}
!0 = distinct !{!1}
; CHECK-NEXT: !1 = !{!2}
; CHECK-NEXT: !2 = !{!1}
!1 = !{!2}
!2 = !{!1}

37
test/MC/ARM/pr22395-2.s Normal file
View File

@ -0,0 +1,37 @@
@ RUN: llvm-mc -triple armv4t-eabi -mattr +d16 -filetype asm -o - %s 2>&1 | FileCheck %s
.text
.thumb
.p2align 2
.fpu vfpv3
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected
.fpu vfpv4
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected
.fpu neon
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected
.fpu neon-vfpv4
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected
.fpu neon-fp-armv8
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected
.fpu crypto-neon-fp-armv8
vldmia r0, {d16-d31}
@ CHECK: vldmia r0, {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
@ CHECK-NOT: error: register expected

View File

@ -6,4 +6,4 @@
; CHECK: .bss
@thingy_linkonce = linkonce_odr global %struct.foo zeroinitializer, align 4
; CHECK: .section .bss,"wb",discard,_thingy_linkonce
; CHECK: .section .bss,"bw",discard,_thingy_linkonce

View File

@ -5,7 +5,7 @@ define void @f() {
}
@ptr = constant void ()* @f, section ".CRT$XLB", align 8
; CHECK: .section .CRT$XLB,"rd"
; CHECK: .section .CRT$XLB,"dr"
@weak_array = weak_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @f to i8*)]
; CHECK: .section .rdata,"rd",discard,weak_array
; CHECK: .section .rdata,"dr",discard,weak_array

View File

@ -1,5 +1,23 @@
// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s
.section baz, "xr"
.def X
.scl 2;
.type 32;
.endef
.globl X
X:
mov Y-X+42, %eax
retl
.def Y
.scl 2;
.type 32;
.endef
.globl Y
Y:
retl
.def _foobar;
.scl 2;
.type 32;
@ -30,3 +48,10 @@ _rust_crate:
// CHECK: SectionData (
// CHECK-NEXT: 0000: 00000000 00000000 1C000000 20000000
// CHECK-NEXT: )
// CHECK: Name: baz
// CHECK: Relocations [
// CHECK-NEXT: ]
// CHECK: SectionData (
// CHECK-NEXT: 0000: A1300000 00C3C3
// CHECK-NEXT: )

View File

@ -49,17 +49,17 @@ define i32 @main() nounwind {
ret i32 0
}
; WIN32: .section .CRT$XCU,"rd"
; WIN32: .section .CRT$XCU,"dr"
; WIN32: a_global_ctor
; WIN32: .section .CRT$XCU,"rd",associative,{{_?}}b
; WIN32: .section .CRT$XCU,"dr",associative,{{_?}}b
; WIN32: b_global_ctor
; WIN32-NOT: c_global_ctor
; WIN32: .section .CRT$XTX,"rd"
; WIN32: .section .CRT$XTX,"dr"
; WIN32: a_global_dtor
; MINGW32: .section .ctors,"wd"
; MINGW32: .section .ctors,"dw"
; MINGW32: a_global_ctor
; MINGW32: .section .ctors,"wd",associative,{{_?}}b
; MINGW32: .section .ctors,"dw",associative,{{_?}}b
; MINGW32: b_global_ctor
; MINGW32-NOT: c_global_ctor
; MINGW32: .section .dtors,"wd"
; MINGW32: .section .dtors,"dw"
; MINGW32: a_global_dtor

View File

@ -3,5 +3,5 @@
@data = dllexport constant [5 x i8] c"data\00", align 1
; CHECK: .section .rdata,"rd"
; CHECK: .section .rdata,"dr"

View File

@ -3,5 +3,5 @@
// CHECK: .section .klaatu,"wn"
.section .barada,"y"
// CHECK: .section .barada,"y"
.section .nikto,"wds"
// CHECK: .section .nikto,"wds"
.section .nikto,"dws"
// CHECK: .section .nikto,"dws"

View File

@ -28,20 +28,20 @@ define weak void @f() section ".sect" {
}
; Weak global
; X86: .section .data,"rd",discard,_a
; X86: .section .data,"dr",discard,_a
; X86: .globl _a
; X86: .zero 12
;
; X64: .section .data,"rd",discard,a
; X64: .section .data,"dr",discard,a
; X64: .globl a
; X64: .zero 12
@a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
; X86: .section .tls$,"wd",discard,_b
; X86: .section .tls$,"dw",discard,_b
; X86: .globl _b
; X86: .long 0
;
; X64: .section .tls$,"wd",discard,b
; X64: .section .tls$,"dw",discard,b
; X64: .globl b
; X64: .long 0

View File

@ -192,4 +192,13 @@ define void @test11(i32 *%P) {
; CHECK-NEXT: ret void
}
; CHECK-LABEL: @test12(
define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
%load0 = load i32* %P1
%1 = load atomic i32* %P2 seq_cst, align 4
%load1 = load i32* %P1
%sel = select i1 %B, i32 %load0, i32 %load1
ret i32 %sel
; CHECK: load i32* %P1
; CHECK: load i32* %P1
}

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -inline -disable-output 2>/dev/null
; This test used to trigger an assertion in the assumption cache when
; inlining the indirect call
declare void @llvm.assume(i1)
define void @foo() {
ret void
}
define void @bar(void ()*) {
call void @llvm.assume(i1 true)
call void %0();
ret void
}
define void @baz() {
call void @bar(void ()* @foo)
ret void
}

View File

@ -1,5 +1,7 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
define i32 @test_load_cast_combine_tbaa(float* %ptr) {
; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
; CHECK-LABEL: @test_load_cast_combine_tbaa(

View File

@ -201,7 +201,7 @@ for.end: ; preds = %for.body
;
; Currently we have three extra add.w's that keep the store address
; live past the next increment because ISEL is unfortunately undoing
; the store chain. ISEL also fails to convert all but one of the stores to
; the store chain. ISEL also fails to convert the stores to
; post-increment addressing. However, the loads should use
; post-increment addressing, no add's or add.w's beyond the three
; mentioned. Most importantly, there should be no spills or reloads!
@ -210,7 +210,7 @@ for.end: ; preds = %for.body
; A9: %.lr.ph
; A9-NOT: lsl.w
; A9-NOT: {{ldr|str|adds|add r}}
; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
; A9: add.w r
; A9-NOT: {{ldr|str|adds|add r}}
; A9: add.w r
; A9-NOT: {{ldr|str|adds|add r}}

View File

@ -0,0 +1,22 @@
; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
%T = type { i64, i64 }
define void @test(i8* %src) {
%tmp = alloca i8
%dst = alloca i8
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 1, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %tmp, i64 1, i32 8, i1 false)
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
; Check that the noalias for "dst" was removed by checking that the metadata is gone
; CHECK-NOT: "dst"
!0 = !{!0}
!1 = distinct !{!1, !0, !"dst"}
!2 = distinct !{!1}

View File

@ -0,0 +1,50 @@
; RUN: opt < %s -basicaa -slp-vectorizer -S -mcpu=corei7-avx | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
; Ensure we can handle x86_mmx values which are primitive and can be bitcast
; with integer types but can't be put into a vector.
;
; CHECK-LABEL: @test1
; CHECK: store i64
; CHECK: store i64
; CHECK: ret void
entry:
%a.cast = bitcast x86_mmx %a to i64
%b.cast = bitcast x86_mmx %b to i64
%a.and = and i64 %a.cast, 42
%b.and = and i64 %b.cast, 42
%gep = getelementptr i64* %ptr, i32 1
store i64 %a.and, i64* %ptr
store i64 %b.and, i64* %gep
ret void
}
define void @test2(x86_mmx %a, x86_mmx %b) {
; Same as @test1 but using phi-input vectorization instead of store
; vectorization.
;
; CHECK-LABEL: @test2
; CHECK: and i64
; CHECK: and i64
; CHECK: ret void
entry:
br i1 undef, label %if.then, label %exit
if.then:
%a.cast = bitcast x86_mmx %a to i64
%b.cast = bitcast x86_mmx %b to i64
%a.and = and i64 %a.cast, 42
%b.and = and i64 %b.cast, 42
br label %exit
exit:
%a.phi = phi i64 [ 0, %entry ], [ %a.and, %if.then ]
%b.phi = phi i64 [ 0, %entry ], [ %b.and, %if.then ]
tail call void @f(i64 %a.phi, i64 %b.phi)
ret void
}
declare void @f(i64, i64)

View File

@ -0,0 +1,24 @@
; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) {
%tmp = alloca i8
%tmp2 = alloca i8
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp, i64 1, i32 8, i1 false), !alias.scope !5
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %tmp2, i64 1, i32 8, i1 false), !noalias !6
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
!0 = !{!0}
!1 = distinct !{!1, !0, !"in"}
!2 = distinct !{!2, !0, !"tmp"}
!3 = distinct !{!3, !0, !"tmp2"}
!4 = distinct !{!1, !2}
!5 = distinct !{!2, !3}
!6 = distinct !{!1, !2}

View File

@ -0,0 +1,9 @@
; RUN: llvm-as -o %t.bc %s
; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -plugin-opt=emit-llvm \
; RUN: --no-map-whole-files -r -o %t2.bc %t.bc
; RUN: llvm-dis < %t2.bc -o - | FileCheck %s
; CHECK: main
define i32 @main() {
ret i32 0
}

View File

@ -559,11 +559,9 @@ static void freeSymName(ld_plugin_symbol &Sym) {
}
static std::unique_ptr<Module>
getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
getModuleForFile(LLVMContext &Context, claimed_file &F,
off_t Filesize, raw_fd_ostream *ApiFile,
StringSet<> &Internalize, StringSet<> &Maybe) {
ld_plugin_input_file File;
if (get_input_file(F.handle, &File) != LDPS_OK)
message(LDPL_FATAL, "Failed to get file information");
if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK)
message(LDPL_FATAL, "Failed to get symbol information");
@ -572,7 +570,7 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
if (get_view(F.handle, &View) != LDPS_OK)
message(LDPL_FATAL, "Failed to get a view of file");
MemoryBufferRef BufferRef(StringRef((const char *)View, File.filesize), "");
MemoryBufferRef BufferRef(StringRef((const char *)View, Filesize), "");
ErrorOr<std::unique_ptr<object::IRObjectFile>> ObjOrErr =
object::IRObjectFile::create(BufferRef, Context);
@ -580,9 +578,6 @@ getModuleForFile(LLVMContext &Context, claimed_file &F, raw_fd_ostream *ApiFile,
message(LDPL_FATAL, "Could not read bitcode from file : %s",
EC.message().c_str());
if (release_input_file(F.handle) != LDPS_OK)
message(LDPL_FATAL, "Failed to release file information");
object::IRObjectFile &Obj = **ObjOrErr;
Module &M = Obj.getModule();
@ -798,8 +793,12 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
StringSet<> Internalize;
StringSet<> Maybe;
for (claimed_file &F : Modules) {
ld_plugin_input_file File;
if (get_input_file(F.handle, &File) != LDPS_OK)
message(LDPL_FATAL, "Failed to get file information");
std::unique_ptr<Module> M =
getModuleForFile(Context, F, ApiFile, Internalize, Maybe);
getModuleForFile(Context, F, File.filesize, ApiFile,
Internalize, Maybe);
if (!options::triple.empty())
M->setTargetTriple(options::triple.c_str());
else if (M->getTargetTriple().empty()) {
@ -808,6 +807,8 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
if (L.linkInModule(M.get()))
message(LDPL_FATAL, "Failed to link module");
if (release_input_file(F.handle) != LDPS_OK)
message(LDPL_FATAL, "Failed to release file information");
}
for (const auto &Name : Internalize) {