Vendor import of llvm trunk r321530:
https://llvm.org/svn/llvm-project/llvm/trunk@321530
This commit is contained in:
parent
90a93bbe93
commit
479b811dec
@ -142,7 +142,7 @@ func TestSubtypes(t *testing.T) {
|
||||
int_pointer := PointerType(cont.Int32Type(), 0)
|
||||
int_inner := int_pointer.Subtypes()
|
||||
if len(int_inner) != 1 {
|
||||
t.Errorf("Got size %d, though wanted 1")
|
||||
t.Errorf("Got size %d, though wanted 1", len(int_inner))
|
||||
}
|
||||
if int_inner[0] != cont.Int32Type() {
|
||||
t.Errorf("Expected int32 type")
|
||||
@ -151,7 +151,7 @@ func TestSubtypes(t *testing.T) {
|
||||
st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false)
|
||||
st_inner := st_pointer.Subtypes()
|
||||
if len(st_inner) != 2 {
|
||||
t.Errorf("Got size %d, though wanted 2")
|
||||
t.Errorf("Got size %d, though wanted 2", len(int_inner))
|
||||
}
|
||||
if st_inner[0] != cont.Int32Type() {
|
||||
t.Errorf("Expected first struct field to be int32")
|
||||
|
@ -17,7 +17,7 @@ include(HandleLLVMStdlib)
|
||||
|
||||
if( UNIX AND NOT (BEOS OR HAIKU) )
|
||||
# Used by check_symbol_exists:
|
||||
set(CMAKE_REQUIRED_LIBRARIES m)
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES "m")
|
||||
endif()
|
||||
# x86_64 FreeBSD 9.2 requires libcxxrt to be specified explicitly.
|
||||
if( CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE" AND
|
||||
|
@ -8,7 +8,7 @@ INCLUDE(CheckLibraryExists)
|
||||
|
||||
function(check_working_cxx_atomics varname)
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "-std=c++11")
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11")
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <atomic>
|
||||
std::atomic<int> x;
|
||||
|
@ -28,7 +28,7 @@ if(NOT DEFINED LLVM_COMPILER_CHECKED)
|
||||
# bug in libstdc++4.6 that is fixed in libstdc++4.7.
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
|
||||
set(CMAKE_REQUIRED_FLAGS "-std=c++0x")
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x")
|
||||
check_cxx_source_compiles("
|
||||
#include <atomic>
|
||||
std::atomic<float> x(0.0f);
|
||||
|
@ -1040,7 +1040,7 @@ line argument:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
DEBUG(errs() << "I am here!\n");
|
||||
DEBUG(dbgs() << "I am here!\n");
|
||||
|
||||
Then you can run your pass like this:
|
||||
|
||||
@ -1076,10 +1076,10 @@ follows:
|
||||
.. code-block:: c++
|
||||
|
||||
#define DEBUG_TYPE "foo"
|
||||
DEBUG(errs() << "'foo' debug type\n");
|
||||
DEBUG(dbgs() << "'foo' debug type\n");
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "bar"
|
||||
DEBUG(errs() << "'bar' debug type\n"));
|
||||
DEBUG(dbgs() << "'bar' debug type\n");
|
||||
#undef DEBUG_TYPE
|
||||
|
||||
Then you can run your pass like this:
|
||||
@ -1120,8 +1120,8 @@ preceding example could be written as:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
|
||||
DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
|
||||
DEBUG_WITH_TYPE("foo", dbgs() << "'foo' debug type\n");
|
||||
DEBUG_WITH_TYPE("bar", dbgs() << "'bar' debug type\n");
|
||||
|
||||
.. _Statistic:
|
||||
|
||||
|
@ -197,6 +197,9 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
|
||||
Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
|
||||
FastMathFlags FMF, const SimplifyQuery &Q);
|
||||
|
||||
/// Given a callsite, fold the result or return null.
|
||||
Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q);
|
||||
|
||||
/// Given a function and iterators over arguments, fold the result or return
|
||||
/// null.
|
||||
Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin,
|
||||
|
@ -407,12 +407,6 @@ class MemoryDependenceResults {
|
||||
void getNonLocalPointerDependency(Instruction *QueryInst,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result);
|
||||
|
||||
/// Perform a dependency query specifically for QueryInst's access to Loc.
|
||||
/// The other comments for getNonLocalPointerDependency apply here as well.
|
||||
void getNonLocalPointerDependencyFrom(Instruction *QueryInst,
|
||||
const MemoryLocation &Loc, bool isLoad,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result);
|
||||
|
||||
/// Removes an instruction from the dependence analysis, updating the
|
||||
/// dependence of instructions that previously depended on it.
|
||||
void removeInstruction(Instruction *InstToRemove);
|
||||
|
@ -646,9 +646,6 @@ class TargetTransformInfo {
|
||||
/// \brief Additional properties of an operand's values.
|
||||
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
|
||||
|
||||
/// \return True if target can execute instructions out of order.
|
||||
bool isOutOfOrder() const;
|
||||
|
||||
/// \return The number of scalar or vector registers that the target has.
|
||||
/// If 'Vectors' is true, it returns the number of vector registers. If it is
|
||||
/// set to false, it returns the number of scalar registers.
|
||||
@ -1021,7 +1018,6 @@ class TargetTransformInfo::Concept {
|
||||
Type *Ty) = 0;
|
||||
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty) = 0;
|
||||
virtual bool isOutOfOrder() const = 0;
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
|
||||
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
|
||||
virtual unsigned getMinVectorRegisterBitWidth() = 0;
|
||||
@ -1299,9 +1295,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
|
||||
Type *Ty) override {
|
||||
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
|
||||
}
|
||||
bool isOutOfOrder() const override {
|
||||
return Impl.isOutOfOrder();
|
||||
}
|
||||
unsigned getNumberOfRegisters(bool Vector) override {
|
||||
return Impl.getNumberOfRegisters(Vector);
|
||||
}
|
||||
|
@ -337,8 +337,6 @@ class TargetTransformInfoImplBase {
|
||||
return TTI::TCC_Free;
|
||||
}
|
||||
|
||||
bool isOutOfOrder() const { return false; }
|
||||
|
||||
unsigned getNumberOfRegisters(bool Vector) { return 8; }
|
||||
|
||||
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
|
||||
|
@ -95,7 +95,7 @@ enum MachineTypes : unsigned {
|
||||
MT_Invalid = 0xffff,
|
||||
|
||||
IMAGE_FILE_MACHINE_UNKNOWN = 0x0,
|
||||
IMAGE_FILE_MACHINE_AM33 = 0x13,
|
||||
IMAGE_FILE_MACHINE_AM33 = 0x1D3,
|
||||
IMAGE_FILE_MACHINE_AMD64 = 0x8664,
|
||||
IMAGE_FILE_MACHINE_ARM = 0x1C0,
|
||||
IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
|
||||
|
@ -402,10 +402,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
|
||||
return BaseT::getInstructionLatency(I);
|
||||
}
|
||||
|
||||
bool isOutOfOrder() const {
|
||||
return getST()->getSchedModel().isOutOfOrder();
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
|
@ -288,7 +288,8 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
|
||||
llvm_i32_ty, // ordering
|
||||
llvm_i32_ty, // scope
|
||||
llvm_i1_ty], // isVolatile
|
||||
[IntrArgMemOnly, NoCapture<0>]
|
||||
[IntrArgMemOnly, NoCapture<0>], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
|
||||
|
@ -226,8 +226,8 @@ class FunctionSamples {
|
||||
|
||||
sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
|
||||
uint32_t Discriminator,
|
||||
const std::string &FName,
|
||||
uint64_t Num, uint64_t Weight = 1) {
|
||||
StringRef FName, uint64_t Num,
|
||||
uint64_t Weight = 1) {
|
||||
return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
|
||||
FName, Num, Weight);
|
||||
}
|
||||
|
@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
|
||||
void DemandedBits::print(raw_ostream &OS) {
|
||||
performAnalysis();
|
||||
for (auto &KV : AliveBits) {
|
||||
OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
|
||||
<< *KV.first << "\n";
|
||||
OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue())
|
||||
<< " for " << *KV.first << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
|
||||
// An undef extract index can be arbitrarily chosen to be an out-of-range
|
||||
// index value, which would result in the instruction being undef.
|
||||
@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
return *ArgBegin;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bswap: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bswap(bswap(x)) -> x
|
||||
if (match(IIOperand, m_BSwap(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bitreverse: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bitreverse(bitreverse(x)) -> x
|
||||
if (match(IIOperand, m_BitReverse(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
return SimplifyRelativeLoad(C0, C1, Q.DL);
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::powi:
|
||||
if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
|
||||
// powi(x, 0) -> 1.0
|
||||
if (Power->isZero())
|
||||
return ConstantFP::get(LHS->getType(), 1.0);
|
||||
// powi(x, 1) -> x
|
||||
if (Power->isOne())
|
||||
return LHS;
|
||||
}
|
||||
return nullptr;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
|
||||
return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
|
||||
}
|
||||
|
||||
Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
|
||||
CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
|
||||
return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
|
||||
Q, RecursionLimit);
|
||||
}
|
||||
|
||||
/// See if we can compute a simplified version of this instruction.
|
||||
/// If not, this returns null.
|
||||
|
||||
@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
|
||||
break;
|
||||
case Instruction::Call: {
|
||||
CallSite CS(cast<CallInst>(I));
|
||||
Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
|
||||
Q);
|
||||
Result = SimplifyCall(CS, Q);
|
||||
break;
|
||||
}
|
||||
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
|
||||
|
@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) {
|
||||
// Check that noalias arguments don't alias other arguments. This is
|
||||
// not fully precise because we don't know the sizes of the dereferenced
|
||||
// memory regions.
|
||||
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
|
||||
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
|
||||
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
|
||||
AttributeList PAL = CS.getAttributes();
|
||||
unsigned ArgNo = 0;
|
||||
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
|
||||
// Skip ByVal arguments since they will be memcpy'd to the callee's
|
||||
// stack so we're not really passing the pointer anyway.
|
||||
if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
|
||||
continue;
|
||||
if (AI != BI && (*BI)->getType()->isPointerTy()) {
|
||||
AliasResult Result = AA->alias(*AI, *BI);
|
||||
Assert(Result != MustAlias && Result != PartialAlias,
|
||||
"Unusual: noalias argument aliases another argument", &I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that an sret argument points to valid memory.
|
||||
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
|
||||
|
@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
|
||||
Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
|
||||
const MemoryLocation Loc = MemoryLocation::get(QueryInst);
|
||||
bool isLoad = isa<LoadInst>(QueryInst);
|
||||
return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result);
|
||||
}
|
||||
|
||||
void MemoryDependenceResults::getNonLocalPointerDependencyFrom(
|
||||
Instruction *QueryInst,
|
||||
const MemoryLocation &Loc,
|
||||
bool isLoad,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result) {
|
||||
BasicBlock *FromBB = QueryInst->getParent();
|
||||
assert(FromBB);
|
||||
|
||||
@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
|
||||
// If we already have a cache entry for this CacheKey, we may need to do some
|
||||
// work to reconcile the cache entry and the current query.
|
||||
if (!Pair.second) {
|
||||
if (CacheInfo->Size != Loc.Size) {
|
||||
// The query's Size differs from the cached one. Throw out the
|
||||
// cached data and proceed with the query at the new size.
|
||||
if (CacheInfo->Size < Loc.Size) {
|
||||
// The query's Size is greater than the cached one. Throw out the
|
||||
// cached data and proceed with the query at the greater size.
|
||||
CacheInfo->Pair = BBSkipFirstBlockPair();
|
||||
CacheInfo->Size = Loc.Size;
|
||||
for (auto &Entry : CacheInfo->NonLocalDeps)
|
||||
if (Instruction *Inst = Entry.getResult().getInst())
|
||||
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
|
||||
CacheInfo->NonLocalDeps.clear();
|
||||
} else if (CacheInfo->Size > Loc.Size) {
|
||||
// This query's Size is less than the cached one. Conservatively restart
|
||||
// the query using the greater size.
|
||||
return getNonLocalPointerDepFromBB(
|
||||
QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
|
||||
StartBB, Result, Visited, SkipFirstBlock);
|
||||
}
|
||||
|
||||
// If the query's AATags are inconsistent with the cached one,
|
||||
|
@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
|
||||
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
|
||||
// Inliner doesn't handle variadic functions.
|
||||
// FIXME: refactor this to use the same code that inliner is using.
|
||||
F.isVarArg();
|
||||
F.isVarArg() ||
|
||||
// Don't try to import functions with noinline attribute.
|
||||
F.getAttributes().hasFnAttribute(Attribute::NoInline);
|
||||
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
|
||||
/* Live = */ false, F.isDSOLocal());
|
||||
FunctionSummary::FFlags FunFlags{
|
||||
|
@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
|
||||
}
|
||||
if (!hasTrunc)
|
||||
return getAddExpr(Operands);
|
||||
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
|
||||
// In spite we checked in the beginning that ID is not in the cache,
|
||||
// it is possible that during recursion and different modification
|
||||
// ID came to cache, so if we found it, just return it.
|
||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
|
||||
return S;
|
||||
}
|
||||
|
||||
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
|
||||
@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
|
||||
}
|
||||
if (!hasTrunc)
|
||||
return getMulExpr(Operands);
|
||||
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
|
||||
// In spite we checked in the beginning that ID is not in the cache,
|
||||
// it is possible that during recursion and different modification
|
||||
// ID came to cache, so if we found it, just return it.
|
||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
|
||||
return S;
|
||||
}
|
||||
|
||||
// If the input value is a chrec scev, truncate the chrec's operands.
|
||||
|
@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
||||
// generated code.
|
||||
if (isa<DbgInfoIntrinsic>(IP))
|
||||
ScanLimit++;
|
||||
|
||||
// Conservatively, do not use any instruction which has any of wrap/exact
|
||||
// flags installed.
|
||||
// TODO: Instead of simply disable poison instructions we can be clever
|
||||
// here and match SCEV to this instruction.
|
||||
auto canGeneratePoison = [](Instruction *I) {
|
||||
if (isa<OverflowingBinaryOperator>(I) &&
|
||||
(I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
|
||||
return true;
|
||||
if (isa<PossiblyExactOperator>(I) && I->isExact())
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
|
||||
IP->getOperand(1) == RHS)
|
||||
IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
|
||||
return &*IP;
|
||||
if (IP == BlockBegin) break;
|
||||
}
|
||||
|
@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
|
||||
return Cost;
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isOutOfOrder() const {
|
||||
return TTIImpl->isOutOfOrder();
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
|
||||
return TTIImpl->getNumberOfRegisters(Vector);
|
||||
}
|
||||
|
@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
|
||||
LHS = CmpLHS;
|
||||
RHS = CmpRHS;
|
||||
|
||||
// If the predicate is an "or-equal" (FP) predicate, then signed zeroes may
|
||||
// return inconsistent results between implementations.
|
||||
// (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
|
||||
// minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
|
||||
// Therefore we behave conservatively and only proceed if at least one of the
|
||||
// operands is known to not be zero, or if we don't care about signed zeroes.
|
||||
// Signed zero may return inconsistent results between implementations.
|
||||
// (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
|
||||
// minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
|
||||
// Therefore, we behave conservatively and only proceed if at least one of the
|
||||
// operands is known to not be zero or if we don't care about signed zero.
|
||||
switch (Pred) {
|
||||
default: break;
|
||||
// FIXME: Include OGT/OLT/UGT/ULT.
|
||||
case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
|
||||
case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
|
||||
if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
|
||||
@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
|
||||
|
||||
// Deal with type mismatches.
|
||||
if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
|
||||
if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
|
||||
if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
|
||||
// If this is a potential fmin/fmax with a cast to integer, then ignore
|
||||
// -0.0 because there is no corresponding integer value.
|
||||
if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
|
||||
FMF.setNoSignedZeros();
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
|
||||
cast<CastInst>(TrueVal)->getOperand(0), C,
|
||||
LHS, RHS);
|
||||
if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
|
||||
}
|
||||
if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
|
||||
// If this is a potential fmin/fmax with a cast to integer, then ignore
|
||||
// -0.0 because there is no corresponding integer value.
|
||||
if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
|
||||
FMF.setNoSignedZeros();
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
|
||||
C, cast<CastInst>(FalseVal)->getOperand(0),
|
||||
LHS, RHS);
|
||||
}
|
||||
}
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
|
||||
LHS, RHS);
|
||||
|
@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
|
||||
EVT VT = Sel.getValueType();
|
||||
SDLoc DL(Sel);
|
||||
SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
|
||||
assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
|
||||
isConstantFPBuildVectorOrConstantFP(NewCT)) &&
|
||||
"Failed to constant fold a binop with constant operands");
|
||||
if (!NewCT.isUndef() &&
|
||||
!isConstantOrConstantVector(NewCT, true) &&
|
||||
!isConstantFPBuildVectorOrConstantFP(NewCT))
|
||||
return SDValue();
|
||||
|
||||
SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
|
||||
assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
|
||||
isConstantFPBuildVectorOrConstantFP(NewCF)) &&
|
||||
"Failed to constant fold a binop with constant operands");
|
||||
if (!NewCF.isUndef() &&
|
||||
!isConstantOrConstantVector(NewCF, true) &&
|
||||
!isConstantFPBuildVectorOrConstantFP(NewCF))
|
||||
return SDValue();
|
||||
|
||||
return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
|
||||
}
|
||||
@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
|
||||
|
||||
// TODO: What is the 'or' equivalent of this fold?
|
||||
// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
|
||||
if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
|
||||
if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
|
||||
IsInteger && CC0 == ISD::SETNE &&
|
||||
((isNullConstant(LR) && isAllOnesConstant(RR)) ||
|
||||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
|
||||
SDValue One = DAG.getConstant(1, DL, OpVT);
|
||||
@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
|
||||
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
|
||||
VT.getSizeInBits() <= 64) {
|
||||
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
||||
APInt ADDC = ADDI->getAPIntValue();
|
||||
if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
|
||||
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
|
||||
// immediate for an add, but it is legal if its top c2 bits are set,
|
||||
// transform the ADD so the immediate doesn't need to be materialized
|
||||
// in a register.
|
||||
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
|
||||
APInt ADDC = ADDI->getAPIntValue();
|
||||
APInt SRLC = SRLI->getAPIntValue();
|
||||
if (ADDC.getMinSignedBits() <= 64 &&
|
||||
SRLC.ult(VT.getSizeInBits()) &&
|
||||
!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
|
||||
SRLI->getZExtValue());
|
||||
SRLC.getZExtValue());
|
||||
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
|
||||
ADDC |= Mask;
|
||||
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
|
@ -237,6 +237,59 @@ class InstructionVerifier;
|
||||
/// Builds BasicBlockState for each BB of the function.
|
||||
/// It can traverse function for verification and provides all required
|
||||
/// information.
|
||||
///
|
||||
/// GC pointer may be in one of three states: relocated, unrelocated and
|
||||
/// poisoned.
|
||||
/// Relocated pointer may be used without any restrictions.
|
||||
/// Unrelocated pointer cannot be dereferenced, passed as argument to any call
|
||||
/// or returned. Unrelocated pointer may be safely compared against another
|
||||
/// unrelocated pointer or against a pointer exclusively derived from null.
|
||||
/// Poisoned pointers are produced when we somehow derive pointer from relocated
|
||||
/// and unrelocated pointers (e.g. phi, select). This pointers may be safely
|
||||
/// used in a very limited number of situations. Currently the only way to use
|
||||
/// it is comparison against constant exclusively derived from null. All
|
||||
/// limitations arise due to their undefined state: this pointers should be
|
||||
/// treated as relocated and unrelocated simultaneously.
|
||||
/// Rules of deriving:
|
||||
/// R + U = P - that's where the poisoned pointers come from
|
||||
/// P + X = P
|
||||
/// U + U = U
|
||||
/// R + R = R
|
||||
/// X + C = X
|
||||
/// Where "+" - any operation that somehow derive pointer, U - unrelocated,
|
||||
/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
|
||||
/// nothing (in case when "+" is unary operation).
|
||||
/// Deriving of pointers by itself is always safe.
|
||||
/// NOTE: when we are making decision on the status of instruction's result:
|
||||
/// a) for phi we need to check status of each input *at the end of
|
||||
/// corresponding predecessor BB*.
|
||||
/// b) for other instructions we need to check status of each input *at the
|
||||
/// current point*.
|
||||
///
|
||||
/// FIXME: This works fairly well except one case
|
||||
/// bb1:
|
||||
/// p = *some GC-ptr def*
|
||||
/// p1 = gep p, offset
|
||||
/// / |
|
||||
/// / |
|
||||
/// bb2: |
|
||||
/// safepoint |
|
||||
/// \ |
|
||||
/// \ |
|
||||
/// bb3:
|
||||
/// p2 = phi [p, bb2] [p1, bb1]
|
||||
/// p3 = phi [p, bb2] [p, bb1]
|
||||
/// here p and p1 is unrelocated
|
||||
/// p2 and p3 is poisoned (though they shouldn't be)
|
||||
///
|
||||
/// This leads to some weird results:
|
||||
/// cmp eq p, p2 - illegal instruction (false-positive)
|
||||
/// cmp eq p1, p2 - illegal instruction (false-positive)
|
||||
/// cmp eq p, p3 - illegal instruction (false-positive)
|
||||
/// cmp eq p, p1 - ok
|
||||
/// To fix this we need to introduce conception of generations and be able to
|
||||
/// check if two values belong to one generation or not. This way p2 will be
|
||||
/// considered to be unrelocated and no false alarm will happen.
|
||||
class GCPtrTracker {
|
||||
const Function &F;
|
||||
SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
|
||||
@ -244,6 +297,9 @@ class GCPtrTracker {
|
||||
// This set contains defs of unrelocated pointers that are proved to be legal
|
||||
// and don't need verification.
|
||||
DenseSet<const Instruction *> ValidUnrelocatedDefs;
|
||||
// This set contains poisoned defs. They can be safely ignored during
|
||||
// verification too.
|
||||
DenseSet<const Value *> PoisonedDefs;
|
||||
|
||||
public:
|
||||
GCPtrTracker(const Function &F, const DominatorTree &DT);
|
||||
@ -251,6 +307,8 @@ class GCPtrTracker {
|
||||
BasicBlockState *getBasicBlockState(const BasicBlock *BB);
|
||||
const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
|
||||
|
||||
bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
|
||||
|
||||
/// Traverse each BB of the function and call
|
||||
/// InstructionVerifier::verifyInstruction for each possibly invalid
|
||||
/// instruction.
|
||||
@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState(
|
||||
}
|
||||
|
||||
bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
|
||||
return ValidUnrelocatedDefs.count(I);
|
||||
// Poisoned defs are skipped since they are always safe by itself by
|
||||
// definition (for details see comment to this class).
|
||||
return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
|
||||
}
|
||||
|
||||
void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
|
||||
@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
|
||||
"Passed Contribution should be from the passed BasicBlockState!");
|
||||
AvailableValueSet AvailableSet = BBS->AvailableIn;
|
||||
bool ContributionChanged = false;
|
||||
// For explanation why instructions are processed this way see
|
||||
// "Rules of deriving" in the comment to this class.
|
||||
for (const Instruction &I : *BB) {
|
||||
bool ProducesUnrelocatedPointer = false;
|
||||
if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
|
||||
containsGCPtrType(I.getType())) {
|
||||
// GEP/bitcast of unrelocated pointer is legal by itself but this
|
||||
// def shouldn't appear in any AvailableSet.
|
||||
bool ValidUnrelocatedPointerDef = false;
|
||||
bool PoisonedPointerDef = false;
|
||||
// TODO: `select` instructions should be handled here too.
|
||||
if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
|
||||
if (containsGCPtrType(PN->getType())) {
|
||||
// If both is true, output is poisoned.
|
||||
bool HasRelocatedInputs = false;
|
||||
bool HasUnrelocatedInputs = false;
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
||||
const BasicBlock *InBB = PN->getIncomingBlock(i);
|
||||
const Value *InValue = PN->getIncomingValue(i);
|
||||
|
||||
if (isNotExclusivelyConstantDerived(InValue)) {
|
||||
if (isValuePoisoned(InValue)) {
|
||||
// If any of inputs is poisoned, output is always poisoned too.
|
||||
HasRelocatedInputs = true;
|
||||
HasUnrelocatedInputs = true;
|
||||
break;
|
||||
}
|
||||
if (BlockMap[InBB]->AvailableOut.count(InValue))
|
||||
HasRelocatedInputs = true;
|
||||
else
|
||||
HasUnrelocatedInputs = true;
|
||||
}
|
||||
}
|
||||
if (HasUnrelocatedInputs) {
|
||||
if (HasRelocatedInputs)
|
||||
PoisonedPointerDef = true;
|
||||
else
|
||||
ValidUnrelocatedPointerDef = true;
|
||||
}
|
||||
}
|
||||
} else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
|
||||
containsGCPtrType(I.getType())) {
|
||||
// GEP/bitcast of unrelocated pointer is legal by itself but this def
|
||||
// shouldn't appear in any AvailableSet.
|
||||
for (const Value *V : I.operands())
|
||||
if (containsGCPtrType(V->getType()) &&
|
||||
isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
|
||||
ProducesUnrelocatedPointer = true;
|
||||
if (isValuePoisoned(V))
|
||||
PoisonedPointerDef = true;
|
||||
else
|
||||
ValidUnrelocatedPointerDef = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ProducesUnrelocatedPointer) {
|
||||
assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
|
||||
"Value cannot be both unrelocated and poisoned!");
|
||||
if (ValidUnrelocatedPointerDef) {
|
||||
// Remove def of unrelocated pointer from Contribution of this BB and
|
||||
// trigger update of all its successors.
|
||||
Contribution.erase(&I);
|
||||
PoisonedDefs.erase(&I);
|
||||
ValidUnrelocatedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
} else if (PoisonedPointerDef) {
|
||||
// Mark pointer as poisoned, remove its def from Contribution and trigger
|
||||
// update of all successors.
|
||||
Contribution.erase(&I);
|
||||
PoisonedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
} else {
|
||||
bool Cleared = false;
|
||||
transferInstruction(I, Cleared, AvailableSet);
|
||||
(void)Cleared;
|
||||
} else {
|
||||
// Remove def of unrelocated pointer from Contribution of this BB
|
||||
// and trigger update of all its successors.
|
||||
Contribution.erase(&I);
|
||||
ValidUnrelocatedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
}
|
||||
}
|
||||
return ContributionChanged;
|
||||
@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction(
|
||||
|
||||
// Returns true if LHS and RHS are unrelocated pointers and they are
|
||||
// valid unrelocated uses.
|
||||
auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS,
|
||||
&RHS] () {
|
||||
auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
|
||||
&LHS, &RHS] () {
|
||||
// A cmp instruction has valid unrelocated pointer operands only if
|
||||
// both operands are unrelocated pointers.
|
||||
// In the comparison between two pointers, if one is an unrelocated
|
||||
@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction(
|
||||
(baseTyLHS == BaseType::NonConstant &&
|
||||
baseTyRHS == BaseType::ExclusivelySomeConstant))
|
||||
return false;
|
||||
|
||||
// If one of pointers is poisoned and other is not exclusively derived
|
||||
// from null it is an invalid expression: it produces poisoned result
|
||||
// and unless we want to track all defs (not only gc pointers) the only
|
||||
// option is to prohibit such instructions.
|
||||
if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
|
||||
(Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
|
||||
return false;
|
||||
|
||||
// All other cases are valid cases enumerated below:
|
||||
// 1. Comparison between an exlusively derived null pointer and a
|
||||
// 1. Comparison between an exclusively derived null pointer and a
|
||||
// constant base pointer.
|
||||
// 2. Comparison between an exlusively derived null pointer and a
|
||||
// 2. Comparison between an exclusively derived null pointer and a
|
||||
// non-constant unrelocated base pointer.
|
||||
// 3. Comparison between 2 unrelocated pointers.
|
||||
// 4. Comparison between a pointer exclusively derived from null and a
|
||||
// non-constant poisoned pointer.
|
||||
return true;
|
||||
};
|
||||
if (!hasValidUnrelocatedUse()) {
|
||||
|
@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
|
||||
if (TempDir.empty())
|
||||
return;
|
||||
// User asked to save temps, let dump the bitcode file after import.
|
||||
std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
|
||||
std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
|
||||
if (EC)
|
||||
|
@ -427,13 +427,13 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) {
|
||||
GroupName = getTok().getString();
|
||||
Lex();
|
||||
} else if (getParser().parseIdentifier(GroupName)) {
|
||||
return true;
|
||||
return TokError("invalid group name");
|
||||
}
|
||||
if (L.is(AsmToken::Comma)) {
|
||||
Lex();
|
||||
StringRef Linkage;
|
||||
if (getParser().parseIdentifier(Linkage))
|
||||
return true;
|
||||
return TokError("invalid linkage");
|
||||
if (Linkage != "comdat")
|
||||
return TokError("Linkage must be 'comdat'");
|
||||
}
|
||||
|
@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const {
|
||||
}
|
||||
|
||||
std::vector<InstantiationGroup> Result;
|
||||
for (const auto &InstantiationSet : InstantiationSetCollector) {
|
||||
for (auto &InstantiationSet : InstantiationSetCollector) {
|
||||
InstantiationGroup IG{InstantiationSet.first.first,
|
||||
InstantiationSet.first.second,
|
||||
std::move(InstantiationSet.second)};
|
||||
|
@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
|
||||
ParseIndexList(Data, Offset, Indicies);
|
||||
break;
|
||||
default:
|
||||
errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n';
|
||||
errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n';
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
|
||||
O->getNumOccurrencesFlag() == cl::OneOrMore;
|
||||
}
|
||||
|
||||
static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
|
||||
static bool isWhitespace(char C) {
|
||||
return C == ' ' || C == '\t' || C == '\r' || C == '\n';
|
||||
}
|
||||
|
||||
static bool isQuote(char C) { return C == '\"' || C == '\''; }
|
||||
|
||||
@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
break;
|
||||
}
|
||||
|
||||
char C = Src[I];
|
||||
|
||||
// Backslash escapes the next character.
|
||||
if (I + 1 < E && Src[I] == '\\') {
|
||||
if (I + 1 < E && C == '\\') {
|
||||
++I; // Skip the escape.
|
||||
Token.push_back(Src[I]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Consume a quoted string.
|
||||
if (isQuote(Src[I])) {
|
||||
char Quote = Src[I++];
|
||||
while (I != E && Src[I] != Quote) {
|
||||
if (isQuote(C)) {
|
||||
++I;
|
||||
while (I != E && Src[I] != C) {
|
||||
// Backslash escapes the next character.
|
||||
if (Src[I] == '\\' && I + 1 != E)
|
||||
++I;
|
||||
@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
}
|
||||
|
||||
// End the token if this is whitespace.
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
if (!Token.empty())
|
||||
NewArgv.push_back(Saver.save(StringRef(Token)).data());
|
||||
Token.clear();
|
||||
@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
}
|
||||
|
||||
// This is a normal character. Append it.
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
}
|
||||
|
||||
// Append the last token after hitting EOF with no whitespace.
|
||||
@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||
// end of the source string.
|
||||
enum { INIT, UNQUOTED, QUOTED } State = INIT;
|
||||
for (size_t I = 0, E = Src.size(); I != E; ++I) {
|
||||
char C = Src[I];
|
||||
|
||||
// INIT state indicates that the current input index is at the start of
|
||||
// the string or between tokens.
|
||||
if (State == INIT) {
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
// Mark the end of lines in response files
|
||||
if (MarkEOLs && Src[I] == '\n')
|
||||
if (MarkEOLs && C == '\n')
|
||||
NewArgv.push_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = QUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||
// quotes.
|
||||
if (State == UNQUOTED) {
|
||||
// Whitespace means the end of the token.
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
NewArgv.push_back(Saver.save(StringRef(Token)).data());
|
||||
Token.clear();
|
||||
State = INIT;
|
||||
// Mark the end of lines in response files
|
||||
if (MarkEOLs && Src[I] == '\n')
|
||||
if (MarkEOLs && C == '\n')
|
||||
NewArgv.push_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = QUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
continue;
|
||||
}
|
||||
|
||||
// QUOTED state means that it's reading a token quoted by double quotes.
|
||||
if (State == QUOTED) {
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
}
|
||||
}
|
||||
// Append the last token after hitting EOF with no whitespace.
|
||||
|
@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
|
||||
return 1;
|
||||
|
||||
if (ErrorsPrinted > 0)
|
||||
return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n");
|
||||
return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
|
||||
|
||||
// Declare success.
|
||||
Out.keep();
|
||||
|
@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
|
||||
@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
|
||||
@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
|
||||
@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return Found;
|
||||
@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
||||
It = MBB.insert(It, LDRXpost);
|
||||
|
||||
return It;
|
||||
}
|
||||
}
|
||||
|
@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
|
||||
llvm_unreachable("unsupported fp type");
|
||||
|
||||
APInt API = APF.bitcastToAPInt();
|
||||
std::string hexstr(utohexstr(API.getZExtValue()));
|
||||
O << lead;
|
||||
if (hexstr.length() < numHex)
|
||||
O << std::string(numHex - hexstr.length(), '0');
|
||||
O << utohexstr(API.getZExtValue());
|
||||
O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
|
||||
}
|
||||
|
||||
void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "nvptx-mcexpr"
|
||||
@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
|
||||
}
|
||||
|
||||
APInt API = APF.bitcastToAPInt();
|
||||
std::string HexStr(utohexstr(API.getZExtValue()));
|
||||
if (HexStr.length() < NumHex)
|
||||
OS << std::string(NumHex - HexStr.length(), '0');
|
||||
OS << utohexstr(API.getZExtValue());
|
||||
OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true);
|
||||
}
|
||||
|
||||
const NVPTXGenericMCSymbolRefExpr*
|
||||
|
@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
|
||||
if (isVerbose()) {
|
||||
OutStreamer->AddComment("fallthrough-return: $pop" +
|
||||
utostr(MFI->getWARegStackId(
|
||||
Twine(MFI->getWARegStackId(
|
||||
MFI->getWAReg(MI->getOperand(0).getReg()))));
|
||||
OutStreamer->AddBlankLine();
|
||||
}
|
||||
|
@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
Flags |= Prefix;
|
||||
Name = Parser.getTok().getString();
|
||||
Parser.Lex(); // eat the prefix
|
||||
// Hack: we could have something like
|
||||
// Hack: we could have something like "rep # some comment" or
|
||||
// "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
|
||||
while (Name.startswith(";") || Name.startswith("\n") ||
|
||||
Name.startswith("\t") || Name.startswith("/")) {
|
||||
Name.startswith("#") || Name.startswith("\t") ||
|
||||
Name.startswith("/")) {
|
||||
Name = Parser.getTok().getString();
|
||||
Parser.Lex(); // go to next prefix or instr
|
||||
}
|
||||
|
@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
|
||||
FeatureVNNI,
|
||||
FeatureVPCLMULQDQ,
|
||||
FeatureVPOPCNTDQ,
|
||||
FeatureGFNI
|
||||
FeatureGFNI,
|
||||
FeatureCLWB
|
||||
]>;
|
||||
|
||||
class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
|
||||
|
@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget.hasCDI()) {
|
||||
@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, VT, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getConstant(Immediate, dl, VT);
|
||||
}
|
||||
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
|
||||
SDValue
|
||||
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert((VT.getVectorElementType() == MVT::i1) &&
|
||||
@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
|
||||
// We have to manually lower both halves so getNode doesn't try to
|
||||
// reassemble the build_vector.
|
||||
Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
|
||||
Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
|
||||
Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
|
||||
Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
|
||||
}
|
||||
SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
|
||||
@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Generate vectors for predicate vectors.
|
||||
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
|
||||
return LowerBUILD_VECTORvXi1(Op, DAG);
|
||||
return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
|
||||
|
||||
if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
|
||||
return VectorConstant;
|
||||
@ -15658,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
|
||||
}
|
||||
|
||||
/// 64-bit unsigned integer to double expansion.
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// This algorithm is not obvious. Here it is what we're trying to output:
|
||||
/*
|
||||
movq %rax, %xmm0
|
||||
@ -15679,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
// Build some magic constants.
|
||||
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
|
||||
Constant *C0 = ConstantDataVector::get(*Context, CV0);
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
|
||||
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
|
||||
|
||||
SmallVector<Constant*,2> CV1;
|
||||
@ -15726,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
}
|
||||
|
||||
/// 32-bit unsigned integer to float expansion.
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDLoc dl(Op);
|
||||
// FP constant to bias correct the final result.
|
||||
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
|
||||
@ -15760,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
|
||||
|
||||
// Handle final rounding.
|
||||
MVT DestVT = Op.getSimpleValueType();
|
||||
|
||||
if (DestVT.bitsLT(MVT::f64))
|
||||
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (DestVT.bitsGT(MVT::f64))
|
||||
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
|
||||
|
||||
// Handle final rounding.
|
||||
return Sub;
|
||||
return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType());
|
||||
}
|
||||
|
||||
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
|
||||
@ -15901,8 +15888,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
@ -15941,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
if (Op.getSimpleValueType().isVector())
|
||||
return lowerUINT_TO_FP_vec(Op, DAG);
|
||||
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
|
||||
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
MVT DstVT = Op.getSimpleValueType();
|
||||
@ -15954,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
}
|
||||
|
||||
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
|
||||
return LowerUINT_TO_FP_i64(Op, DAG);
|
||||
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
|
||||
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
|
||||
return LowerUINT_TO_FP_i32(Op, DAG);
|
||||
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
|
||||
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
@ -22097,7 +22084,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
|
||||
if (VT == MVT::v4i32) {
|
||||
assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
|
||||
"Should not custom lower when pmuldq is available!");
|
||||
"Should not custom lower when pmulld is available!");
|
||||
|
||||
// If the upper 17 bits of each element are zero then we can use PMADD.
|
||||
APInt Mask17 = APInt::getHighBitsSet(32, 17);
|
||||
if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17))
|
||||
return DAG.getNode(X86ISD::VPMADDWD, dl, VT,
|
||||
DAG.getBitcast(MVT::v8i16, A),
|
||||
DAG.getBitcast(MVT::v8i16, B));
|
||||
|
||||
// Extract the odd parts.
|
||||
static const int UnpackMask[] = { 1, -1, 3, -1 };
|
||||
@ -22149,6 +22143,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
|
||||
bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
|
||||
|
||||
// If DQI is supported we can use MULLQ, but MULUDQ is still better if the
|
||||
// the high bits are known to be zero.
|
||||
if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero))
|
||||
return Op;
|
||||
|
||||
// Bit cast to 32-bit vectors for MULUDQ.
|
||||
SDValue Alo = DAG.getBitcast(MulVT, A);
|
||||
SDValue Blo = DAG.getBitcast(MulVT, B);
|
||||
@ -31012,8 +31011,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
|
||||
}
|
||||
|
||||
// The replacement was made in place; don't return anything.
|
||||
return SDValue();
|
||||
// The replacement was made in place; return N so it won't be revisited.
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
/// If a vector select has an operand that is -1 or 0, try to simplify the
|
||||
@ -32267,6 +32266,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
|
||||
if ((NumElts % 2) != 0)
|
||||
return SDValue();
|
||||
|
||||
// If the upper 17 bits of each element are zero then we can use PMADD.
|
||||
APInt Mask17 = APInt::getHighBitsSet(32, 17);
|
||||
if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) &&
|
||||
DAG.MaskedValueIsZero(N1, Mask17))
|
||||
return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0),
|
||||
DAG.getBitcast(MVT::v8i16, N1));
|
||||
|
||||
unsigned RegSize = 128;
|
||||
MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
|
||||
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
|
||||
@ -34882,7 +34888,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
|
||||
// better to truncate if we have the chance.
|
||||
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
|
||||
!TLI.isOperationLegal(Opcode, SrcVT))
|
||||
!Subtarget.hasDQI())
|
||||
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::ADD: {
|
||||
|
@ -1167,7 +1167,6 @@ namespace llvm {
|
||||
bool isReplace) const;
|
||||
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
@ -1183,9 +1182,6 @@ namespace llvm {
|
||||
|
||||
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
|
||||
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 0>;
|
||||
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
|
||||
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
|
||||
SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
|
||||
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 1>;
|
||||
SSE_INTMUL_ITINS_P, HasBWI, 1>;
|
||||
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
|
||||
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
|
||||
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
|
||||
SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
|
||||
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
|
||||
HasBWI, 1>;
|
||||
defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
|
||||
HasBWI, 1>;
|
||||
@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
|
||||
}
|
||||
}
|
||||
|
||||
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
|
||||
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
|
||||
avx512vl_i32_info, avx512vl_i64_info,
|
||||
X86pmuldq, HasAVX512, 1>,T8PD;
|
||||
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
|
||||
|
@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
|
||||
@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>;
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
|
@ -181,8 +181,9 @@ class SampleProfileLoader {
|
||||
StringRef Name, bool IsThinLTOPreLink,
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
|
||||
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
|
||||
: GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
|
||||
Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
|
||||
: GetAC(std::move(GetAssumptionCache)),
|
||||
GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
|
||||
IsThinLTOPreLink(IsThinLTOPreLink) {}
|
||||
|
||||
bool doInitialization(Module &M);
|
||||
bool runOnModule(Module &M, ModuleAnalysisManager *AM);
|
||||
@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
|
||||
|
||||
// Populate the symbol map.
|
||||
for (const auto &N_F : M.getValueSymbolTable()) {
|
||||
std::string OrigName = N_F.getKey();
|
||||
StringRef OrigName = N_F.getKey();
|
||||
Function *F = dyn_cast<Function>(N_F.getValue());
|
||||
if (F == nullptr)
|
||||
continue;
|
||||
SymbolMap[OrigName] = F;
|
||||
auto pos = OrigName.find('.');
|
||||
if (pos != std::string::npos) {
|
||||
std::string NewName = OrigName.substr(0, pos);
|
||||
if (pos != StringRef::npos) {
|
||||
StringRef NewName = OrigName.substr(0, pos);
|
||||
auto r = SymbolMap.insert(std::make_pair(NewName, F));
|
||||
// Failiing to insert means there is already an entry in SymbolMap,
|
||||
// thus there are multiple functions that are mapped to the same
|
||||
|
@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
|
||||
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
|
||||
Metadata *&GlobalMD = LocalToGlobal[MD];
|
||||
if (!GlobalMD) {
|
||||
std::string NewName =
|
||||
(to_string(LocalToGlobal.size()) + ModuleId).str();
|
||||
std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
|
||||
GlobalMD = MDString::get(M.getContext(), NewName);
|
||||
}
|
||||
|
||||
|
@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
|
||||
/// instructions. For normal calls, it allows visitCallSite to do the heavy
|
||||
/// lifting.
|
||||
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
auto Args = CI.arg_operands();
|
||||
if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
|
||||
Args.end(), SQ.getWithInstruction(&CI)))
|
||||
if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
|
||||
return replaceInstUsesWith(CI, V);
|
||||
|
||||
if (isFreeCall(&CI, &TLI))
|
||||
@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
|
||||
return replaceInstUsesWith(CI, N);
|
||||
return nullptr;
|
||||
|
||||
case Intrinsic::bswap: {
|
||||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
// TODO should this be in InstSimplify?
|
||||
// bswap(bswap(x)) -> x
|
||||
if (match(IIOperand, m_BSwap(m_Value(X))))
|
||||
return replaceInstUsesWith(CI, X);
|
||||
|
||||
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
|
||||
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
|
||||
unsigned C = X->getType()->getPrimitiveSizeInBits() -
|
||||
@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::bitreverse: {
|
||||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
// TODO should this be in InstSimplify?
|
||||
// bitreverse(bitreverse(x)) -> x
|
||||
if (match(IIOperand, m_BitReverse(m_Value(X))))
|
||||
return replaceInstUsesWith(CI, X);
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::masked_load:
|
||||
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
|
||||
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
|
||||
@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
|
||||
case Intrinsic::powi:
|
||||
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
||||
// powi(x, 0) -> 1.0
|
||||
if (Power->isZero())
|
||||
return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
|
||||
// powi(x, 1) -> x
|
||||
if (Power->isOne())
|
||||
return replaceInstUsesWith(CI, II->getArgOperand(0));
|
||||
// 0 and 1 are handled in instsimplify
|
||||
|
||||
// powi(x, -1) -> 1/x
|
||||
if (Power->isMinusOne())
|
||||
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
|
||||
II->getArgOperand(0));
|
||||
// powi(x, 2) -> x*x
|
||||
if (Power->equalsInt(2))
|
||||
return BinaryOperator::CreateFMul(II->getArgOperand(0),
|
||||
II->getArgOperand(0));
|
||||
}
|
||||
break;
|
||||
|
||||
@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
// The compare intrinsic uses the above assumptions and therefore
|
||||
// doesn't require additional flags.
|
||||
if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
|
||||
match(Arg1, m_Zero()) &&
|
||||
match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
|
||||
cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
|
||||
if (Arg0IsZero)
|
||||
std::swap(A, B);
|
||||
|
@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
|
||||
SQ.getWithInstruction(&I)))
|
||||
return replaceInstUsesWith(I, V);
|
||||
|
||||
// Handle cases involving: rem X, (select Cond, Y, Z)
|
||||
if (simplifyDivRemOfSelectWithZeroOp(I))
|
||||
return &I;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
|
||||
unsigned IndexVal = IdxC->getZExtValue();
|
||||
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
|
||||
|
||||
// InstSimplify handles cases where the index is invalid.
|
||||
assert(IndexVal < VectorWidth);
|
||||
// InstSimplify should handle cases where the index is invalid.
|
||||
if (!IdxC->getValue().ule(VectorWidth))
|
||||
return nullptr;
|
||||
|
||||
unsigned IndexVal = IdxC->getZExtValue();
|
||||
|
||||
// This instruction only demands the single element from the input vector.
|
||||
// If the input vector has a single use, simplify it based on this use
|
||||
|
@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
|
||||
unsigned Align = Arg.getParamAlignment();
|
||||
if (Align == 0) Align = DL.getABITypeAlignment(Ty);
|
||||
|
||||
const std::string &Name = Arg.hasName() ? Arg.getName().str() :
|
||||
"Arg" + llvm::to_string(Arg.getArgNo());
|
||||
AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
|
||||
AllocaInst *AI = IRB.CreateAlloca(
|
||||
Ty, nullptr,
|
||||
(Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
|
||||
".byval");
|
||||
AI->setAlignment(Align);
|
||||
Arg.replaceAllUsesWith(AI);
|
||||
|
||||
|
@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
|
||||
DenseMap<uint32_t, unsigned> VNums;
|
||||
for (auto *I : Insts) {
|
||||
uint32_t N = VN.lookupOrAdd(I);
|
||||
DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
|
||||
DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
|
||||
if (N == ~0U)
|
||||
return None;
|
||||
VNums[N]++;
|
||||
|
@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||
Alignment = DL.getABITypeAlignment(EltType);
|
||||
}
|
||||
|
||||
// Remember the debug location.
|
||||
DebugLoc Loc;
|
||||
if (!Range.TheStores.empty())
|
||||
Loc = Range.TheStores[0]->getDebugLoc();
|
||||
AMemSet =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
|
||||
DEBUG(dbgs() << "Replace stores:\n";
|
||||
for (Instruction *SI : Range.TheStores)
|
||||
dbgs() << *SI << '\n');
|
||||
dbgs() << *SI << '\n';
|
||||
dbgs() << "With: " << *AMemSet << '\n');
|
||||
|
||||
if (!Range.TheStores.empty())
|
||||
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
|
||||
|
||||
// Zap all the stores.
|
||||
for (Instruction *SI : Range.TheStores) {
|
||||
MD->removeInstruction(SI);
|
||||
SI->eraseFromParent();
|
||||
}
|
||||
|
||||
// Create the memset after removing the stores, so that if there any cached
|
||||
// non-local dependencies on the removed instructions in
|
||||
// MemoryDependenceAnalysis, the cache entries are updated to "dirty"
|
||||
// entries pointing below the memset, so subsequent queries include the
|
||||
// memset.
|
||||
AMemSet =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
if (!Range.TheStores.empty())
|
||||
AMemSet->setDebugLoc(Loc);
|
||||
|
||||
DEBUG(dbgs() << "With: " << *AMemSet << '\n');
|
||||
|
||||
++NumMemSetInfer;
|
||||
}
|
||||
|
||||
@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
|
||||
//
|
||||
// NOTE: This is conservative, it will stop on any read from the source loc,
|
||||
// not just the defining memcpy.
|
||||
MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
|
||||
MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
|
||||
M->getIterator(), M->getParent());
|
||||
|
||||
if (SourceDep.isNonLocal()) {
|
||||
SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
|
||||
MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
|
||||
NonLocalDepResults);
|
||||
if (NonLocalDepResults.size() == 1) {
|
||||
SourceDep = NonLocalDepResults[0].getResult();
|
||||
assert((!SourceDep.getInst() ||
|
||||
LookupDomTree().dominates(SourceDep.getInst(), M)) &&
|
||||
"when memdep returns exactly one result, it should dominate");
|
||||
}
|
||||
}
|
||||
|
||||
MemDepResult SourceDep =
|
||||
MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
|
||||
M->getIterator(), M->getParent());
|
||||
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
|
||||
return false;
|
||||
|
||||
@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
|
||||
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
|
||||
SrcLoc, true, M->getIterator(), M->getParent());
|
||||
|
||||
if (SrcDepInfo.isNonLocal()) {
|
||||
SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
|
||||
MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
|
||||
NonLocalDepResults);
|
||||
if (NonLocalDepResults.size() == 1) {
|
||||
SrcDepInfo = NonLocalDepResults[0].getResult();
|
||||
assert((!SrcDepInfo.getInst() ||
|
||||
LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
|
||||
"when memdep returns exactly one result, it should dominate");
|
||||
}
|
||||
}
|
||||
|
||||
if (SrcDepInfo.isClobber()) {
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep);
|
||||
|
@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
|
||||
StatepointLiveSetTy Updated;
|
||||
findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
|
||||
|
||||
#ifndef NDEBUG
|
||||
DenseSet<Value *> Bases;
|
||||
for (auto KVPair : Info.PointerToBase)
|
||||
Bases.insert(KVPair.second);
|
||||
#endif
|
||||
|
||||
// We may have base pointers which are now live that weren't before. We need
|
||||
// to update the PointerToBase structure to reflect this.
|
||||
for (auto V : Updated)
|
||||
if (Info.PointerToBase.insert({V, V}).second) {
|
||||
assert(Bases.count(V) && "Can't find base for unexpected live value!");
|
||||
assert(isKnownBaseResult(V) &&
|
||||
"Can't find base for unexpected live value!");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
|
||||
NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
|
||||
DT, LI, PreserveLCSSA);
|
||||
// NewExit gets its DebugLoc from LatchExit, which is not part of the
|
||||
// original Loop.
|
||||
// Fix this by setting Loop's DebugLoc to NewExit.
|
||||
auto *NewExitTerminator = NewExit->getTerminator();
|
||||
NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
|
||||
// Split NewExit to insert epilog remainder loop.
|
||||
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
|
||||
EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
|
||||
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
|
||||
} else {
|
||||
// If prolog remainder
|
||||
|
@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(
|
||||
cl::desc("Limit maximum recursion depth when calculating costs of "
|
||||
"speculatively executed instructions"));
|
||||
|
||||
static cl::opt<unsigned> DependenceChainLatency(
|
||||
"dependence-chain-latency", cl::Hidden, cl::init(8),
|
||||
cl::desc("Limit the maximum latency of dependence chain containing cmp "
|
||||
"for if conversion"));
|
||||
|
||||
static cl::opt<unsigned> SmallBBSize(
|
||||
"small-bb-size", cl::Hidden, cl::init(40),
|
||||
cl::desc("Check dependence chain latency only in basic block smaller than "
|
||||
"this number"));
|
||||
|
||||
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
|
||||
STATISTIC(NumLinearMaps,
|
||||
"Number of switch instructions turned into linear mapping");
|
||||
@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Estimate the code size of the specified BB.
|
||||
static unsigned CountBBCodeSize(BasicBlock *BB,
|
||||
const TargetTransformInfo &TTI) {
|
||||
unsigned Size = 0;
|
||||
for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
|
||||
Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
|
||||
return Size;
|
||||
}
|
||||
|
||||
/// Find out the latency of the longest dependence chain in the BB if
|
||||
/// LongestChain is true, or the dependence chain containing the compare
|
||||
/// instruction feeding the block's conditional branch.
|
||||
static unsigned FindDependenceChainLatency(BasicBlock *BB,
|
||||
DenseMap<Instruction *, unsigned> &Instructions,
|
||||
const TargetTransformInfo &TTI,
|
||||
bool LongestChain) {
|
||||
unsigned MaxLatency = 0;
|
||||
|
||||
BasicBlock::iterator II;
|
||||
for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
|
||||
unsigned Latency = 0;
|
||||
for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
|
||||
Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
|
||||
if (Op && Instructions.count(Op)) {
|
||||
auto OpLatency = Instructions[Op];
|
||||
if (OpLatency > Latency)
|
||||
Latency = OpLatency;
|
||||
}
|
||||
}
|
||||
Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
|
||||
Instructions[&(*II)] = Latency;
|
||||
|
||||
if (Latency > MaxLatency)
|
||||
MaxLatency = Latency;
|
||||
}
|
||||
|
||||
if (LongestChain)
|
||||
return MaxLatency;
|
||||
|
||||
// The length of the dependence chain containing the compare instruction is
|
||||
// wanted, so the terminator must be a BranchInst.
|
||||
assert(isa<BranchInst>(II));
|
||||
BranchInst* Br = cast<BranchInst>(II);
|
||||
Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
|
||||
if (Cmp && Instructions.count(Cmp))
|
||||
return Instructions[Cmp];
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Instructions in BB2 may depend on instructions in BB1, and instructions
|
||||
/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
|
||||
/// of instruction in BB1 is I, then the instructions after I can be executed
|
||||
/// in parallel with instructions in BB2.
|
||||
/// This function returns the latency of I.
|
||||
static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
|
||||
BasicBlock *IfBlock1, BasicBlock *IfBlock2,
|
||||
DenseMap<Instruction *, unsigned> &BB1Instructions) {
|
||||
unsigned LastLatency = 0;
|
||||
SmallVector<Instruction *, 16> Worklist;
|
||||
BasicBlock::iterator II;
|
||||
for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
|
||||
if (PHINode *PN = dyn_cast<PHINode>(II)) {
|
||||
// Look for users in BB2.
|
||||
bool InBBUser = false;
|
||||
for (User *U : PN->users()) {
|
||||
if (cast<Instruction>(U)->getParent() == BB2) {
|
||||
InBBUser = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// No such user, we don't care about this instruction and its operands.
|
||||
if (!InBBUser)
|
||||
break;
|
||||
}
|
||||
Worklist.push_back(&(*II));
|
||||
}
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *I = Worklist.pop_back_val();
|
||||
for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
|
||||
if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
|
||||
if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
|
||||
Worklist.push_back(Op);
|
||||
else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
|
||||
if (BB1Instructions[Op] > LastLatency)
|
||||
LastLatency = BB1Instructions[Op];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return LastLatency;
|
||||
}
|
||||
|
||||
/// If after if conversion, most of the instructions in this new BB construct a
|
||||
/// long and slow dependence chain, it may be slower than cmp/branch, even
|
||||
/// if the branch has a high miss rate, because the control dependence is
|
||||
/// transformed into data dependence, and control dependence can be speculated,
|
||||
/// and thus, the second part can execute in parallel with the first part on
|
||||
/// modern OOO processor.
|
||||
///
|
||||
/// To check this condition, this function finds the length of the dependence
|
||||
/// chain in BB1 (only the part that can be executed in parallel with code after
|
||||
/// branch in BB2) containing cmp, and if the length is longer than a threshold,
|
||||
/// don't perform if conversion.
|
||||
///
|
||||
/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
|
||||
/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
|
||||
static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
|
||||
BasicBlock *IfBlock1, BasicBlock *IfBlock2,
|
||||
unsigned SpeculationSize,
|
||||
const TargetTransformInfo &TTI) {
|
||||
// Accumulated latency of each instruction in their BBs.
|
||||
DenseMap<Instruction *, unsigned> BB1Instructions;
|
||||
DenseMap<Instruction *, unsigned> BB2Instructions;
|
||||
|
||||
if (!TTI.isOutOfOrder())
|
||||
return false;
|
||||
|
||||
unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
|
||||
+ SpeculationSize;
|
||||
|
||||
// We check small BB only since it is more difficult to find unrelated
|
||||
// instructions to fill functional units in a small BB.
|
||||
if (NewBBSize > SmallBBSize)
|
||||
return false;
|
||||
|
||||
auto BB1Chain =
|
||||
FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
|
||||
auto BB2Chain =
|
||||
FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
|
||||
|
||||
// If there are many unrelated instructions in the new BB, there will be
|
||||
// other instructions for the processor to issue regardless of the length
|
||||
// of this new dependence chain.
|
||||
// Modern processors can issue 3 or more instructions in each cycle. But in
|
||||
// real world applications, an IPC of 2 is already very good for non-loop
|
||||
// code with small basic blocks. Higher IPC is usually found in programs with
|
||||
// small kernel. So IPC of 2 is more reasonable for most applications.
|
||||
if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
|
||||
return false;
|
||||
|
||||
// We only care about part of the dependence chain in BB1 that can be
|
||||
// executed in parallel with BB2, so adjust the latency.
|
||||
BB1Chain -=
|
||||
LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
|
||||
|
||||
// Correctly predicted branch instruction can skip the dependence chain in
|
||||
// BB1, but misprediction has a penalty, so only when the dependence chain is
|
||||
// longer than DependenceChainLatency, then branch is better than select.
|
||||
// Besides misprediction penalty, the threshold value DependenceChainLatency
|
||||
// also depends on branch misprediction rate, taken branch latency and cmov
|
||||
// latency.
|
||||
if (BB1Chain >= DependenceChainLatency)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Extract ConstantInt from value, looking through IntToPtr
|
||||
/// and PointerNullValue. Return NULL if value is not a constant int.
|
||||
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
|
||||
@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
|
||||
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
|
||||
return false;
|
||||
|
||||
// Don't do if conversion for long dependence chain.
|
||||
if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
|
||||
CountBBCodeSize(ThenBB, TTI), TTI))
|
||||
return false;
|
||||
|
||||
// If we get here, we can hoist the instruction and if-convert.
|
||||
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
|
||||
|
||||
@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
}
|
||||
}
|
||||
|
||||
if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
|
||||
AggressiveInsts.size(), TTI))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
|
||||
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
|
||||
; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %1 = add nsw i32 %a, 5
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %2 = mul nsw i32 %1, %b
|
||||
; CHECK-DAG: DemandedBits: 0xff for %1 = add nsw i32 %a, 5
|
||||
; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8
|
||||
; CHECK-DAG: DemandedBits: 0xff for %2 = mul nsw i32 %1, %b
|
||||
define i8 @test_mul(i32 %a, i32 %b) {
|
||||
%1 = add nsw i32 %a, 5
|
||||
%2 = mul nsw i32 %1, %b
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
|
||||
; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bitreverse.i32(i32 %1)
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8
|
||||
; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1
|
||||
; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bitreverse.i32(i32 %1)
|
||||
; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8
|
||||
define i8 @test_bswap(i32 %x) {
|
||||
%1 = or i32 %x, 1
|
||||
%2 = call i32 @llvm.bswap.i32(i32 %1)
|
||||
@ -12,9 +12,9 @@ define i8 @test_bswap(i32 %x) {
|
||||
}
|
||||
declare i32 @llvm.bswap.i32(i32)
|
||||
|
||||
; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bswap.i32(i32 %1)
|
||||
; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8
|
||||
; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1
|
||||
; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bswap.i32(i32 %1)
|
||||
; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8
|
||||
define i8 @test_bitreverse(i32 %x) {
|
||||
%1 = or i32 %x, 1
|
||||
%2 = call i32 @llvm.bitreverse.i32(i32 %1)
|
||||
|
48
test/Analysis/Lint/noalias-byval.ll
Normal file
48
test/Analysis/Lint/noalias-byval.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: opt < %s -lint -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
%s = type { i8 }
|
||||
|
||||
; Function Attrs: argmemonly nounwind
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0
|
||||
|
||||
; Function Attrs: argmemonly nounwind
|
||||
declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0
|
||||
|
||||
declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone)
|
||||
|
||||
define void @f2() {
|
||||
entry:
|
||||
%c = alloca %s
|
||||
%tmp = alloca %s
|
||||
%0 = bitcast %s* %c to i8*
|
||||
%1 = bitcast %s* %tmp to i8*
|
||||
call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
|
||||
call void @f1(%s* sret %c, %s* %c)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Lint should complain about us passing %c to both arguments since one of them
|
||||
; is noalias.
|
||||
; CHECK: Unusual: noalias argument aliases another argument
|
||||
; CHECK-NEXT: call void @f1(%s* sret %c, %s* %c)
|
||||
|
||||
declare void @f3(%s* noalias nocapture sret, %s* byval nocapture readnone)
|
||||
|
||||
define void @f4() {
|
||||
entry:
|
||||
%c = alloca %s
|
||||
%tmp = alloca %s
|
||||
%0 = bitcast %s* %c to i8*
|
||||
%1 = bitcast %s* %tmp to i8*
|
||||
call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
|
||||
call void @f3(%s* sret %c, %s* byval %c)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Lint should not complain about passing %c to both arguments even if one is
|
||||
; noalias, since the other one is byval, effectively copying the data to the
|
||||
; stack instead of passing the pointer itself.
|
||||
; CHECK-NOT: Unusual: noalias argument aliases another argument
|
||||
; CHECK-NOT: call void @f3(%s* sret %c, %s* %c)
|
||||
|
||||
attributes #0 = { argmemonly nounwind }
|
72
test/Analysis/ScalarEvolution/truncate.ll
Normal file
72
test/Analysis/ScalarEvolution/truncate.ll
Normal file
@ -0,0 +1,72 @@
|
||||
; RUN: opt < %s -analyze -scalar-evolution
|
||||
; RUN: opt < %s -passes='print<scalar-evolution>'
|
||||
; Regression test for assert ScalarEvolution::getTruncateExpr.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @snork(i8* %arg, i8 %arg1, i64 %arg2) {
|
||||
bb:
|
||||
br label %bb12
|
||||
|
||||
bb3: ; preds = %bb34
|
||||
br i1 true, label %bb4, label %bb12
|
||||
|
||||
bb4: ; preds = %bb3
|
||||
br label %bb6
|
||||
|
||||
bb5: ; preds = %bb6
|
||||
ret void
|
||||
|
||||
bb6: ; preds = %bb6, %bb4
|
||||
%tmp = phi i64 [ %tmp28, %bb4 ], [ %tmp10, %bb6 ]
|
||||
%tmp7 = phi i32 [ 3, %bb4 ], [ %tmp11, %bb6 ]
|
||||
%tmp8 = trunc i64 %tmp to i32
|
||||
%tmp9 = sdiv i32 %tmp8, %tmp7
|
||||
%tmp10 = add i64 %tmp, -1
|
||||
%tmp11 = add i32 %tmp9, %tmp7
|
||||
br i1 true, label %bb5, label %bb6
|
||||
|
||||
bb12: ; preds = %bb3, %bb
|
||||
br label %bb13
|
||||
|
||||
bb13: ; preds = %bb34, %bb12
|
||||
%tmp14 = phi i64 [ %arg2, %bb12 ], [ %tmp28, %bb34 ]
|
||||
%tmp15 = phi i8 [ %arg1, %bb12 ], [ %tmp26, %bb34 ]
|
||||
%tmp16 = phi i32 [ 1, %bb12 ], [ %tmp35, %bb34 ]
|
||||
%tmp17 = add i8 %tmp15, -1
|
||||
%tmp18 = sext i8 %tmp17 to i64
|
||||
%tmp19 = sub i64 1, %tmp14
|
||||
%tmp20 = add i64 %tmp19, %tmp18
|
||||
%tmp21 = trunc i64 %tmp20 to i32
|
||||
%tmp22 = icmp eq i32 %tmp21, 0
|
||||
br i1 %tmp22, label %bb32, label %bb23
|
||||
|
||||
bb23: ; preds = %bb13
|
||||
br i1 true, label %bb25, label %bb24
|
||||
|
||||
bb24: ; preds = %bb23
|
||||
br label %bb25
|
||||
|
||||
bb25: ; preds = %bb24, %bb23
|
||||
%tmp26 = add i8 %tmp15, -2
|
||||
%tmp27 = sext i8 %tmp26 to i64
|
||||
%tmp28 = sub i64 %tmp27, %tmp20
|
||||
%tmp29 = trunc i64 %tmp28 to i32
|
||||
%tmp30 = icmp eq i32 %tmp29, 0
|
||||
br i1 %tmp30, label %bb31, label %bb34
|
||||
|
||||
bb31: ; preds = %bb25
|
||||
br label %bb33
|
||||
|
||||
bb32: ; preds = %bb13
|
||||
br label %bb33
|
||||
|
||||
bb33: ; preds = %bb32, %bb31
|
||||
unreachable
|
||||
|
||||
bb34: ; preds = %bb25
|
||||
%tmp35 = add nuw nsw i32 %tmp16, 2
|
||||
%tmp36 = icmp ugt i32 %tmp16, 52
|
||||
br i1 %tmp36, label %bb3, label %bb13
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
|
||||
#
|
||||
name: f1_2s
|
||||
registers:
|
||||
@ -80,3 +80,82 @@ body: |
|
||||
# PROFITABLE-LABEL: name: f1_2d
|
||||
# PROFITABLE: %5:fpr128 = FNEGv2f64 %2
|
||||
# PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1
|
||||
---
|
||||
name: f1_both_fmul_2s
|
||||
registers:
|
||||
- { id: 0, class: fpr64 }
|
||||
- { id: 1, class: fpr64 }
|
||||
- { id: 2, class: fpr64 }
|
||||
- { id: 3, class: fpr64 }
|
||||
- { id: 4, class: fpr64 }
|
||||
- { id: 5, class: fpr64 }
|
||||
- { id: 6, class: fpr64 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr64 = COPY %q3
|
||||
%2:fpr64 = COPY %q2
|
||||
%1:fpr64 = COPY %q1
|
||||
%0:fpr64 = COPY %q0
|
||||
%4:fpr64 = FMULv2f32 %0, %1
|
||||
%5:fpr64 = FMULv2f32 %2, %3
|
||||
%6:fpr64 = FSUBv2f32 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_2s
|
||||
# ALL: %4:fpr64 = FMULv2f32 %0, %1
|
||||
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3
|
||||
---
|
||||
name: f1_both_fmul_4s
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: fpr128 }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr128 = COPY %q3
|
||||
%2:fpr128 = COPY %q2
|
||||
%1:fpr128 = COPY %q1
|
||||
%0:fpr128 = COPY %q0
|
||||
%4:fpr128 = FMULv4f32 %0, %1
|
||||
%5:fpr128 = FMULv4f32 %2, %3
|
||||
%6:fpr128 = FSUBv4f32 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_4s
|
||||
# ALL: %4:fpr128 = FMULv4f32 %0, %1
|
||||
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3
|
||||
---
|
||||
name: f1_both_fmul_2d
|
||||
registers:
|
||||
- { id: 0, class: fpr128 }
|
||||
- { id: 1, class: fpr128 }
|
||||
- { id: 2, class: fpr128 }
|
||||
- { id: 3, class: fpr128 }
|
||||
- { id: 4, class: fpr128 }
|
||||
- { id: 5, class: fpr128 }
|
||||
- { id: 6, class: fpr128 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%3:fpr128 = COPY %q3
|
||||
%2:fpr128 = COPY %q2
|
||||
%1:fpr128 = COPY %q1
|
||||
%0:fpr128 = COPY %q0
|
||||
%4:fpr128 = FMULv2f64 %0, %1
|
||||
%5:fpr128 = FMULv2f64 %2, %3
|
||||
%6:fpr128 = FSUBv2f64 killed %4, %5
|
||||
%q0 = COPY %6
|
||||
RET_ReallyLR implicit %q0
|
||||
|
||||
...
|
||||
# ALL-LABEL: name: f1_both_fmul_2d
|
||||
# ALL: %4:fpr128 = FMULv2f64 %0, %1
|
||||
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3
|
||||
|
||||
|
13
test/CodeGen/AArch64/combine-and-like.ll
Normal file
13
test/CodeGen/AArch64/combine-and-like.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
||||
|
||||
define i32 @f(i32 %a0) {
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
%1 = lshr i32 %a0, 2147483647
|
||||
%2 = add i32 %1, 2147483647
|
||||
%3 = and i32 %2, %1
|
||||
ret i32 %3
|
||||
}
|
@ -129,7 +129,7 @@ entry:
|
||||
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
|
||||
; GENERIC-LABEL: imulq512:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: imulq512:
|
||||
@ -143,7 +143,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
|
||||
define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
|
||||
; GENERIC-LABEL: imulq256:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: imulq256:
|
||||
@ -157,7 +157,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
|
||||
define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
|
||||
; GENERIC-LABEL: imulq128:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: imulq128:
|
||||
@ -550,7 +550,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
|
||||
define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
|
||||
; GENERIC-LABEL: vpmulld_test:
|
||||
; GENERIC: # %bb.0:
|
||||
; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
|
||||
; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
|
||||
; GENERIC-NEXT: retq # sched: [1:1.00]
|
||||
;
|
||||
; SKX-LABEL: vpmulld_test:
|
||||
|
@ -57,7 +57,7 @@
|
||||
# return w;
|
||||
# }
|
||||
#
|
||||
# CHECK: 129: eb 13 jmp 19 <ifElse+0x7E>
|
||||
# CHECK: 129: eb 13 jmp 19 <ifElse+0x7e>
|
||||
# CHECK: 12e: eb a0 jmp -96 <ifElse+0x10>
|
||||
# CHECK: 132: eb 9c jmp -100 <ifElse+0x10>
|
||||
# CHECK: 137: eb 97 jmp -105 <ifElse+0x10>
|
||||
|
@ -1,6 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512DQVL
|
||||
|
||||
; TODO - shuffle+sext are superfluous
|
||||
define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
@ -66,13 +69,29 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_shuffle_zero_pmuludq:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX2-LABEL: combine_shuffle_zero_pmuludq:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: combine_shuffle_zero_pmuludq:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
|
||||
; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
%1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
%2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
||||
%3 = bitcast <4 x i32> %1 to <2 x i64>
|
||||
@ -94,13 +113,29 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
|
||||
; SSE-NEXT: pmuludq %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_shuffle_zero_pmuludq_256:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
|
||||
; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
|
||||
; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX2-LABEL: combine_shuffle_zero_pmuludq_256:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
|
||||
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
|
||||
; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7]
|
||||
; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
|
||||
; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
%1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
%2 = shufflevector <8 x i32> %a1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||
%3 = bitcast <8 x i32> %1 to <4 x i64>
|
||||
@ -108,3 +143,46 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1)
|
||||
%5 = mul <4 x i64> %3, %4
|
||||
ret <4 x i64> %5
|
||||
}
|
||||
|
||||
define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) {
|
||||
; SSE-LABEL: combine_zext_pmuludq_256:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
|
||||
; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
|
||||
; SSE-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
|
||||
; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
|
||||
; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883]
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm2
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm4
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm3
|
||||
; SSE-NEXT: movdqa %xmm4, %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: combine_zext_pmuludq_256:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [715827883,715827883,715827883,715827883]
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: combine_zext_pmuludq_256:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
|
||||
; AVX512VL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512DQVL-LABEL: combine_zext_pmuludq_256:
|
||||
; AVX512DQVL: # %bb.0:
|
||||
; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
|
||||
; AVX512DQVL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512DQVL-NEXT: retq
|
||||
%1 = zext <8 x i32> %a to <8 x i64>
|
||||
%2 = mul nuw nsw <8 x i64> %1, <i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883>
|
||||
ret <8 x i64> %2
|
||||
}
|
||||
|
@ -95,6 +95,41 @@ define double @div3_arcp(double %x, double %y, double %z) {
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define float @div_select_constant_fold(i1 zeroext %arg) {
|
||||
; CHECK-LABEL: div_select_constant_fold:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: jne .LBB6_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: .LBB6_1:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: retq
|
||||
%tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
|
||||
%B2 = fdiv float %tmp, 1.000000e+00
|
||||
ret float %B2
|
||||
}
|
||||
|
||||
define float @div_select_constant_fold_zero(i1 zeroext %arg) {
|
||||
; CHECK-LABEL: div_select_constant_fold_zero:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: jne .LBB7_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: jmp .LBB7_3
|
||||
; CHECK-NEXT: .LBB7_1:
|
||||
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: .LBB7_3:
|
||||
; CHECK-NEXT: xorps %xmm1, %xmm1
|
||||
; CHECK-NEXT: divss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
|
||||
%B2 = fdiv float %tmp, 0.000000e+00
|
||||
ret float %B2
|
||||
}
|
||||
|
||||
define void @PR24141() {
|
||||
; CHECK-LABEL: PR24141:
|
||||
; CHECK: callq
|
||||
|
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
|
||||
; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
|
||||
; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32
|
||||
@ -7,34 +8,59 @@
|
||||
; use an efficient mov/shift sequence rather than shuffling each individual
|
||||
; element out of the index vector.
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; LIN: movdqa (%rsi), %xmm0
|
||||
; LIN: pand (%rdx), %xmm0
|
||||
; LIN: pextrq $1, %xmm0, %r[[REG4:.+]]
|
||||
; LIN: movq %xmm0, %r[[REG2:.+]]
|
||||
; LIN: movslq %e[[REG2]], %r[[REG1:.+]]
|
||||
; LIN: sarq $32, %r[[REG2]]
|
||||
; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
||||
; LIN: sarq $32, %r[[REG4]]
|
||||
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1
|
||||
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1
|
||||
; LIN: movq %rdi, %xmm1
|
||||
; LIN: movq %r[[REG3]], %xmm0
|
||||
|
||||
; WIN: movdqa (%rdx), %xmm0
|
||||
; WIN: pand (%r8), %xmm0
|
||||
; WIN: pextrq $1, %xmm0, %r[[REG4:.+]]
|
||||
; WIN: movq %xmm0, %r[[REG2:.+]]
|
||||
; WIN: movslq %e[[REG2]], %r[[REG1:.+]]
|
||||
; WIN: sarq $32, %r[[REG2]]
|
||||
; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
||||
; WIN: sarq $32, %r[[REG4]]
|
||||
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1
|
||||
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
|
||||
; WIN: movdqa (%r[[REG2]]), %xmm0
|
||||
; WIN: movq %r[[REG2]], %xmm1
|
||||
|
||||
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
||||
; LIN-LABEL: foo:
|
||||
; LIN: # %bb.0:
|
||||
; LIN-NEXT: movdqa (%rsi), %xmm0
|
||||
; LIN-NEXT: pand (%rdx), %xmm0
|
||||
; LIN-NEXT: pextrq $1, %xmm0, %rax
|
||||
; LIN-NEXT: movq %xmm0, %rcx
|
||||
; LIN-NEXT: movslq %ecx, %rdx
|
||||
; LIN-NEXT: sarq $32, %rcx
|
||||
; LIN-NEXT: movslq %eax, %rsi
|
||||
; LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; LIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; LIN-NEXT: sarq $32, %rax
|
||||
; LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; LIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; LIN-NEXT: retq
|
||||
;
|
||||
; WIN-LABEL: foo:
|
||||
; WIN: # %bb.0:
|
||||
; WIN-NEXT: movdqa (%rdx), %xmm0
|
||||
; WIN-NEXT: pand (%r8), %xmm0
|
||||
; WIN-NEXT: pextrq $1, %xmm0, %rax
|
||||
; WIN-NEXT: movq %xmm0, %rdx
|
||||
; WIN-NEXT: movslq %edx, %r8
|
||||
; WIN-NEXT: sarq $32, %rdx
|
||||
; WIN-NEXT: movslq %eax, %r9
|
||||
; WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; WIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; WIN-NEXT: sarq $32, %rax
|
||||
; WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; WIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; WIN-NEXT: retq
|
||||
;
|
||||
; LIN32-LABEL: foo:
|
||||
; LIN32: # %bb.0:
|
||||
; LIN32-NEXT: pushl %edi
|
||||
; LIN32-NEXT: pushl %esi
|
||||
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; LIN32-NEXT: movdqa (%edx), %xmm0
|
||||
; LIN32-NEXT: pand (%ecx), %xmm0
|
||||
; LIN32-NEXT: pextrd $1, %xmm0, %ecx
|
||||
; LIN32-NEXT: pextrd $2, %xmm0, %edx
|
||||
; LIN32-NEXT: pextrd $3, %xmm0, %esi
|
||||
; LIN32-NEXT: movd %xmm0, %edi
|
||||
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
|
||||
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
|
||||
; LIN32-NEXT: popl %esi
|
||||
; LIN32-NEXT: popl %edi
|
||||
; LIN32-NEXT: retl
|
||||
%a = load <4 x i32>, <4 x i32>* %i
|
||||
%b = load <4 x i32>, <4 x i32>* %h
|
||||
%j = and <4 x i32> %a, %b
|
||||
@ -60,13 +86,81 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
||||
; Check that the sequence previously used above, which bounces the vector off the
|
||||
; cache works for x86-32. Note that in this case it will not be used for index
|
||||
; calculation, since indexes are 32-bit, not 64.
|
||||
; CHECK-LABEL: old:
|
||||
; LIN32: movaps %xmm0, (%esp)
|
||||
; LIN32-DAG: {{(mov|and)}}l (%esp),
|
||||
; LIN32-DAG: {{(mov|and)}}l 4(%esp),
|
||||
; LIN32-DAG: {{(mov|and)}}l 8(%esp),
|
||||
; LIN32-DAG: {{(mov|and)}}l 12(%esp),
|
||||
define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind {
|
||||
; LIN-LABEL: old:
|
||||
; LIN: # %bb.0:
|
||||
; LIN-NEXT: movdqa (%rsi), %xmm0
|
||||
; LIN-NEXT: pand (%rdx), %xmm0
|
||||
; LIN-NEXT: pextrq $1, %xmm0, %rax
|
||||
; LIN-NEXT: movq %rax, %rdx
|
||||
; LIN-NEXT: shrq $32, %rdx
|
||||
; LIN-NEXT: movq %xmm0, %rsi
|
||||
; LIN-NEXT: movq %rsi, %rdi
|
||||
; LIN-NEXT: shrq $32, %rdi
|
||||
; LIN-NEXT: andl %ecx, %esi
|
||||
; LIN-NEXT: andl %ecx, %eax
|
||||
; LIN-NEXT: andq %rcx, %rdi
|
||||
; LIN-NEXT: andq %rcx, %rdx
|
||||
; LIN-NEXT: movq %rdi, %xmm1
|
||||
; LIN-NEXT: movq %rsi, %xmm0
|
||||
; LIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; LIN-NEXT: movq %rdx, %xmm2
|
||||
; LIN-NEXT: movq %rax, %xmm1
|
||||
; LIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; LIN-NEXT: retq
|
||||
;
|
||||
; WIN-LABEL: old:
|
||||
; WIN: # %bb.0:
|
||||
; WIN-NEXT: movdqa (%rdx), %xmm0
|
||||
; WIN-NEXT: pand (%r8), %xmm0
|
||||
; WIN-NEXT: pextrq $1, %xmm0, %r8
|
||||
; WIN-NEXT: movq %r8, %rcx
|
||||
; WIN-NEXT: shrq $32, %rcx
|
||||
; WIN-NEXT: movq %xmm0, %rax
|
||||
; WIN-NEXT: movq %rax, %rdx
|
||||
; WIN-NEXT: shrq $32, %rdx
|
||||
; WIN-NEXT: andl %r9d, %eax
|
||||
; WIN-NEXT: andl %r9d, %r8d
|
||||
; WIN-NEXT: andq %r9, %rdx
|
||||
; WIN-NEXT: andq %r9, %rcx
|
||||
; WIN-NEXT: movq %rdx, %xmm1
|
||||
; WIN-NEXT: movq %rax, %xmm0
|
||||
; WIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; WIN-NEXT: movq %rcx, %xmm2
|
||||
; WIN-NEXT: movq %r8, %xmm1
|
||||
; WIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; WIN-NEXT: retq
|
||||
;
|
||||
; LIN32-LABEL: old:
|
||||
; LIN32: # %bb.0:
|
||||
; LIN32-NEXT: pushl %ebp
|
||||
; LIN32-NEXT: movl %esp, %ebp
|
||||
; LIN32-NEXT: pushl %esi
|
||||
; LIN32-NEXT: andl $-16, %esp
|
||||
; LIN32-NEXT: subl $32, %esp
|
||||
; LIN32-NEXT: movl 20(%ebp), %eax
|
||||
; LIN32-NEXT: movl 16(%ebp), %ecx
|
||||
; LIN32-NEXT: movl 12(%ebp), %edx
|
||||
; LIN32-NEXT: movaps (%edx), %xmm0
|
||||
; LIN32-NEXT: andps (%ecx), %xmm0
|
||||
; LIN32-NEXT: movaps %xmm0, (%esp)
|
||||
; LIN32-NEXT: movl (%esp), %ecx
|
||||
; LIN32-NEXT: andl %eax, %ecx
|
||||
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; LIN32-NEXT: andl %eax, %edx
|
||||
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; LIN32-NEXT: andl %eax, %esi
|
||||
; LIN32-NEXT: andl {{[0-9]+}}(%esp), %eax
|
||||
; LIN32-NEXT: movd %edx, %xmm1
|
||||
; LIN32-NEXT: movd %ecx, %xmm0
|
||||
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; LIN32-NEXT: movd %eax, %xmm2
|
||||
; LIN32-NEXT: movd %esi, %xmm1
|
||||
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; LIN32-NEXT: leal -4(%ebp), %esp
|
||||
; LIN32-NEXT: popl %esi
|
||||
; LIN32-NEXT: popl %ebp
|
||||
; LIN32-NEXT: retl
|
||||
%a = load <4 x i32>, <4 x i32>* %i
|
||||
%b = load <4 x i32>, <4 x i32>* %h
|
||||
%j = and <4 x i32> %a, %b
|
||||
@ -77,7 +171,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
|
||||
%q0 = zext i32 %d0 to i64
|
||||
%q1 = zext i32 %d1 to i64
|
||||
%q2 = zext i32 %d2 to i64
|
||||
%q3 = zext i32 %d3 to i64
|
||||
%q3 = zext i32 %d3 to i64
|
||||
%r0 = and i64 %q0, %f
|
||||
%r1 = and i64 %q1, %f
|
||||
%r2 = and i64 %q2, %f
|
||||
|
@ -497,7 +497,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||
; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
|
||||
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
||||
@ -510,7 +510,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||
; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
|
||||
; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
|
||||
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
|
||||
; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1
|
||||
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
||||
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
@ -582,7 +582,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||
; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
|
||||
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
|
||||
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
||||
; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
|
||||
@ -595,7 +595,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||
; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
|
||||
; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
|
||||
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
|
||||
; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1
|
||||
; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
|
||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
||||
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||
|
@ -183,3 +183,27 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
|
||||
ret i32 %t1
|
||||
}
|
||||
|
||||
; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
|
||||
; Don't combine with i1 - out of range constant
|
||||
define void @test_i1_uge(i1 *%A2) {
|
||||
; CHECK-LABEL: test_i1_uge:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movb (%rdi), %al
|
||||
; CHECK-NEXT: movl %eax, %ecx
|
||||
; CHECK-NEXT: xorb $1, %cl
|
||||
; CHECK-NEXT: andb %cl, %al
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: negq %rax
|
||||
; CHECK-NEXT: andb $1, %cl
|
||||
; CHECK-NEXT: movb %cl, (%rdi,%rax)
|
||||
; CHECK-NEXT: retq
|
||||
%L5 = load i1, i1* %A2
|
||||
%C3 = icmp ne i1 %L5, true
|
||||
%C8 = icmp eq i1 %L5, false
|
||||
%C9 = icmp ugt i1 %C3, %C8
|
||||
%G3 = getelementptr i1, i1* %A2, i1 %C9
|
||||
store i1 %C3, i1* %G3
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -112,13 +112,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
|
||||
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SSE-NEXT: movl c, %esi
|
||||
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; X86-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
|
||||
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
||||
; X86-SSE-NEXT: pmaddwd %xmm0, %xmm2
|
||||
; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4)
|
||||
; X86-SSE-NEXT: popl %esi
|
||||
; X86-SSE-NEXT: retl
|
||||
;
|
||||
@ -142,13 +143,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
|
||||
; X64-SSE: # %bb.0: # %entry
|
||||
; X64-SSE-NEXT: movq {{.*}}(%rip), %rax
|
||||
; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: pxor %xmm2, %xmm2
|
||||
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
|
||||
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
|
||||
; X64-SSE-NEXT: pmullw %xmm0, %xmm1
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
|
||||
; X64-SSE-NEXT: movdqu %xmm1, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
|
||||
; X64-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
|
||||
; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
|
||||
; X64-SSE-NEXT: pmaddwd %xmm0, %xmm2
|
||||
; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: mul_4xi8:
|
||||
@ -2215,13 +2217,7 @@ define void @PR34947() {
|
||||
; X86-SSE-NEXT: xorl %edx, %edx
|
||||
; X86-SSE-NEXT: divl (%eax)
|
||||
; X86-SSE-NEXT: movd %edx, %xmm0
|
||||
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X86-SSE-NEXT: pmuludq %xmm2, %xmm3
|
||||
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm1
|
||||
; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X86-SSE-NEXT: movd %eax, %xmm2
|
||||
; X86-SSE-NEXT: pmuludq %xmm0, %xmm2
|
||||
@ -2415,13 +2411,7 @@ define void @PR34947() {
|
||||
; X64-SSE-NEXT: xorl %edx, %edx
|
||||
; X64-SSE-NEXT: divl (%rax)
|
||||
; X64-SSE-NEXT: movd %edx, %xmm0
|
||||
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
|
||||
; X64-SSE-NEXT: pmuludq %xmm2, %xmm1
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; X64-SSE-NEXT: pmuludq %xmm2, %xmm3
|
||||
; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
|
||||
; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm1
|
||||
; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007
|
||||
; X64-SSE-NEXT: movd %eax, %xmm2
|
||||
; X64-SSE-NEXT: pmuludq %xmm0, %xmm2
|
||||
|
@ -10,22 +10,14 @@
|
||||
define <4 x i32> @foo(<4 x i8> %A) {
|
||||
; CHECK32-LABEL: foo:
|
||||
; CHECK32: # %bb.0:
|
||||
; CHECK32-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u]
|
||||
; CHECK32-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
|
||||
; CHECK32-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK32-NEXT: pmullw %xmm1, %xmm0
|
||||
; CHECK32-NEXT: pmulhw %xmm1, %xmm2
|
||||
; CHECK32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; CHECK32-NEXT: pand {{\.LCPI.*}}, %xmm0
|
||||
; CHECK32-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0
|
||||
; CHECK32-NEXT: retl
|
||||
;
|
||||
; CHECK64-LABEL: foo:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u]
|
||||
; CHECK64-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u>
|
||||
; CHECK64-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK64-NEXT: pmullw %xmm1, %xmm0
|
||||
; CHECK64-NEXT: pmulhw %xmm1, %xmm2
|
||||
; CHECK64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
|
||||
; CHECK64-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK64-NEXT: pmaddwd {{.*}}(%rip), %xmm0
|
||||
; CHECK64-NEXT: retq
|
||||
;
|
||||
; SSE4-32-LABEL: foo:
|
||||
|
@ -5624,16 +5624,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_pmaddwd:
|
||||
; ATOM: # %bb.0:
|
||||
; ATOM-NEXT: pmaddwd %xmm1, %xmm0
|
||||
; ATOM-NEXT: pmaddwd (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00]
|
||||
; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00]
|
||||
; ATOM-NEXT: retq # sched: [79:39.50]
|
||||
;
|
||||
; SLM-LABEL: test_pmaddwd:
|
||||
@ -6241,16 +6233,8 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_pmuludq:
|
||||
; ATOM: # %bb.0:
|
||||
; ATOM-NEXT: pmuludq %xmm1, %xmm0
|
||||
; ATOM-NEXT: pmuludq (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00]
|
||||
; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00]
|
||||
; ATOM-NEXT: retq # sched: [79:39.50]
|
||||
;
|
||||
; SLM-LABEL: test_pmuludq:
|
||||
@ -6394,12 +6378,8 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||
;
|
||||
; ATOM-LABEL: test_psadbw:
|
||||
; ATOM: # %bb.0:
|
||||
; ATOM-NEXT: psadbw %xmm1, %xmm0
|
||||
; ATOM-NEXT: psadbw (%rdi), %xmm0
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50]
|
||||
; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [1:1.00]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
; ATOM-NEXT: nop # sched: [1:0.50]
|
||||
|
@ -7,8 +7,8 @@
|
||||
@ RUN: | FileCheck %s -check-prefix CHECK-THUMB
|
||||
|
||||
b.w .Lbranch
|
||||
@ CHECK-ARM: b #4 <$a.0+0xC>
|
||||
@ CHECK-THUMB: b.w #8 <$t.0+0xC>
|
||||
@ CHECK-ARM: b #4 <$a.0+0xc>
|
||||
@ CHECK-THUMB: b.w #8 <$t.0+0xc>
|
||||
adds r0, r1, #42
|
||||
adds r1, r2, #42
|
||||
.Lbranch:
|
||||
|
14
test/MC/ELF/comdat-declaration-errors.s
Normal file
14
test/MC/ELF/comdat-declaration-errors.s
Normal file
@ -0,0 +1,14 @@
|
||||
// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s \
|
||||
// RUN: -filetype=obj -o %t.o 2>&1 | FileCheck %s
|
||||
|
||||
// Check we error out on incorrect COMDATs declarations
|
||||
// and not just silently ingnore them.
|
||||
|
||||
// CHECK: error: invalid group name
|
||||
// CHECK-NEXT: .section .foo,"G",@progbits,-abc,comdat
|
||||
|
||||
// CHECK: error: invalid linkage
|
||||
// CHECK-NEXT: .section .bar,"G",@progbits,abc,-comdat
|
||||
|
||||
.section .foo,"G",@progbits,-abc,comdat
|
||||
.section .bar,"G",@progbits,abc,-comdat
|
169
test/MC/X86/PREFETCH-32.s
Normal file
169
test/MC/X86/PREFETCH-32.s
Normal file
@ -0,0 +1,169 @@
|
||||
// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: prefetch -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetch -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetch 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetch 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetch 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetch 485498096(%edx)
|
||||
|
||||
// CHECK: prefetch 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x05,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetch 485498096
|
||||
|
||||
// CHECK: prefetch 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40]
|
||||
prefetch 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetch (%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x02]
|
||||
prefetch (%edx)
|
||||
|
||||
// CHECK: prefetchnta -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x84,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetchnta -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchnta 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x84,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchnta 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchnta 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchnta 485498096(%edx)
|
||||
|
||||
// CHECK: prefetchnta 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x05,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchnta 485498096
|
||||
|
||||
// CHECK: prefetchnta 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40]
|
||||
prefetchnta 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetchnta (%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x02]
|
||||
prefetchnta (%edx)
|
||||
|
||||
// CHECK: prefetcht0 -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetcht0 -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht0 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht0 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht0 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x8a,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht0 485498096(%edx)
|
||||
|
||||
// CHECK: prefetcht0 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x0d,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht0 485498096
|
||||
|
||||
// CHECK: prefetcht0 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40]
|
||||
prefetcht0 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetcht0 (%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x0a]
|
||||
prefetcht0 (%edx)
|
||||
|
||||
// CHECK: prefetcht1 -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x94,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetcht1 -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht1 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x94,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht1 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht1 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x92,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht1 485498096(%edx)
|
||||
|
||||
// CHECK: prefetcht1 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x15,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht1 485498096
|
||||
|
||||
// CHECK: prefetcht1 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40]
|
||||
prefetcht1 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetcht1 (%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x12]
|
||||
prefetcht1 (%edx)
|
||||
|
||||
// CHECK: prefetcht2 -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetcht2 -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht2 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht2 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetcht2 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x9a,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht2 485498096(%edx)
|
||||
|
||||
// CHECK: prefetcht2 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x1d,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht2 485498096
|
||||
|
||||
// CHECK: prefetcht2 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40]
|
||||
prefetcht2 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetcht2 (%edx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x1a]
|
||||
prefetcht2 (%edx)
|
||||
|
||||
// CHECK: prefetchw -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetchw -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchw 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchw 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchw 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x8a,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchw 485498096(%edx)
|
||||
|
||||
// CHECK: prefetchw 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x0d,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchw 485498096
|
||||
|
||||
// CHECK: prefetchw 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40]
|
||||
prefetchw 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetchw (%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x0a]
|
||||
prefetchw (%edx)
|
||||
|
||||
// CHECK: prefetchwt1 -485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0x10,0xe3,0x0f,0xe3]
|
||||
prefetchwt1 -485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchwt1 485498096(%edx,%eax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchwt1 485498096(%edx,%eax,4)
|
||||
|
||||
// CHECK: prefetchwt1 485498096(%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x92,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchwt1 485498096(%edx)
|
||||
|
||||
// CHECK: prefetchwt1 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x15,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchwt1 485498096
|
||||
|
||||
// CHECK: prefetchwt1 64(%edx,%eax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40
|
||||
prefetchwt1 64(%edx,%eax)
|
||||
|
||||
// CHECK: prefetchwt1 (%edx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x12]
|
||||
prefetchwt1 (%edx)
|
170
test/MC/X86/PREFETCH-64.s
Normal file
170
test/MC/X86/PREFETCH-64.s
Normal file
@ -0,0 +1,170 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: prefetch 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetch 485498096
|
||||
|
||||
// CHECK: prefetch 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x42,0x40]
|
||||
prefetch 64(%rdx)
|
||||
|
||||
// CHECK: prefetch 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0x40]
|
||||
prefetch 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetch -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0xc0]
|
||||
prefetch -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetch 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40]
|
||||
prefetch 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetchnta 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchnta 485498096
|
||||
|
||||
// CHECK: prefetchnta 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x42,0x40]
|
||||
prefetchnta 64(%rdx)
|
||||
|
||||
// CHECK: prefetchnta 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x44,0x82,0x40]
|
||||
prefetchnta 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchnta -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x44,0x82,0xc0]
|
||||
prefetchnta -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchnta 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40]
|
||||
prefetchnta 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetchnta (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x02]
|
||||
prefetchnta (%rdx)
|
||||
|
||||
// CHECK: prefetch (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x02]
|
||||
prefetch (%rdx)
|
||||
|
||||
// CHECK: prefetcht0 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x0c,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht0 485498096
|
||||
|
||||
// CHECK: prefetcht0 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x4a,0x40]
|
||||
prefetcht0 64(%rdx)
|
||||
|
||||
// CHECK: prefetcht0 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0x40]
|
||||
prefetcht0 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht0 -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0xc0]
|
||||
prefetcht0 -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht0 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40]
|
||||
prefetcht0 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetcht0 (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x0a]
|
||||
prefetcht0 (%rdx)
|
||||
|
||||
// CHECK: prefetcht1 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x14,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht1 485498096
|
||||
|
||||
// CHECK: prefetcht1 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x52,0x40]
|
||||
prefetcht1 64(%rdx)
|
||||
|
||||
// CHECK: prefetcht1 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x54,0x82,0x40]
|
||||
prefetcht1 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht1 -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x54,0x82,0xc0]
|
||||
prefetcht1 -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht1 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40]
|
||||
prefetcht1 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetcht1 (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x12]
|
||||
prefetcht1 (%rdx)
|
||||
|
||||
// CHECK: prefetcht2 485498096
|
||||
// CHECK: encoding: [0x0f,0x18,0x1c,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetcht2 485498096
|
||||
|
||||
// CHECK: prefetcht2 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x5a,0x40]
|
||||
prefetcht2 64(%rdx)
|
||||
|
||||
// CHECK: prefetcht2 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0x40]
|
||||
prefetcht2 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht2 -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0xc0]
|
||||
prefetcht2 -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetcht2 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40]
|
||||
prefetcht2 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetcht2 (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x18,0x1a]
|
||||
prefetcht2 (%rdx)
|
||||
|
||||
// CHECK: prefetchw 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x0c,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchw 485498096
|
||||
|
||||
// CHECK: prefetchw 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x4a,0x40]
|
||||
prefetchw 64(%rdx)
|
||||
|
||||
// CHECK: prefetchw 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0x40]
|
||||
prefetchw 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchw -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0xc0]
|
||||
prefetchw -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchw 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40]
|
||||
prefetchw 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetchw (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x0a]
|
||||
prefetchw (%rdx)
|
||||
|
||||
// CHECK: prefetchwt1 485498096
|
||||
// CHECK: encoding: [0x0f,0x0d,0x14,0x25,0xf0,0x1c,0xf0,0x1c]
|
||||
prefetchwt1 485498096
|
||||
|
||||
// CHECK: prefetchwt1 64(%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x52,0x40]
|
||||
prefetchwt1 64(%rdx)
|
||||
|
||||
// CHECK: prefetchwt1 64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0x40]
|
||||
prefetchwt1 64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchwt1 -64(%rdx,%rax,4)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0xc0]
|
||||
prefetchwt1 -64(%rdx,%rax,4)
|
||||
|
||||
// CHECK: prefetchwt1 64(%rdx,%rax)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40]
|
||||
prefetchwt1 64(%rdx,%rax)
|
||||
|
||||
// CHECK: prefetchwt1 (%rdx)
|
||||
// CHECK: encoding: [0x0f,0x0d,0x12]
|
||||
prefetchwt1 (%rdx)
|
||||
|
6
test/MC/X86/RDPMC-32.s
Normal file
6
test/MC/X86/RDPMC-32.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdpmc
|
||||
// CHECK: encoding: [0x0f,0x33]
|
||||
rdpmc
|
||||
|
6
test/MC/X86/RDPMC-64.s
Normal file
6
test/MC/X86/RDPMC-64.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdpmc
|
||||
// CHECK: encoding: [0x0f,0x33]
|
||||
rdpmc
|
||||
|
6
test/MC/X86/RDRAND-32.s
Normal file
6
test/MC/X86/RDRAND-32.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdrandl %eax
|
||||
// CHECK: encoding: [0x0f,0xc7,0xf0]
|
||||
rdrandl %eax
|
||||
|
14
test/MC/X86/RDRAND-64.s
Normal file
14
test/MC/X86/RDRAND-64.s
Normal file
@ -0,0 +1,14 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdrandl %r13d
|
||||
// CHECK: encoding: [0x41,0x0f,0xc7,0xf5]
|
||||
rdrandl %r13d
|
||||
|
||||
// CHECK: rdrandq %r13
|
||||
// CHECK: encoding: [0x49,0x0f,0xc7,0xf5]
|
||||
rdrandq %r13
|
||||
|
||||
// CHECK: rdrandw %r13w
|
||||
// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf5]
|
||||
rdrandw %r13w
|
||||
|
6
test/MC/X86/RDSEED-32.s
Normal file
6
test/MC/X86/RDSEED-32.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdseedl %eax
|
||||
// CHECK: encoding: [0x0f,0xc7,0xf8]
|
||||
rdseedl %eax
|
||||
|
14
test/MC/X86/RDSEED-64.s
Normal file
14
test/MC/X86/RDSEED-64.s
Normal file
@ -0,0 +1,14 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdseedl %r13d
|
||||
// CHECK: encoding: [0x41,0x0f,0xc7,0xfd]
|
||||
rdseedl %r13d
|
||||
|
||||
// CHECK: rdseedq %r13
|
||||
// CHECK: encoding: [0x49,0x0f,0xc7,0xfd]
|
||||
rdseedq %r13
|
||||
|
||||
// CHECK: rdseedw %r13w
|
||||
// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfd]
|
||||
rdseedw %r13w
|
||||
|
6
test/MC/X86/RDTSCP-32.s
Normal file
6
test/MC/X86/RDTSCP-32.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdtscp
|
||||
// CHECK: encoding: [0x0f,0x01,0xf9]
|
||||
rdtscp
|
||||
|
6
test/MC/X86/RDTSCP-64.s
Normal file
6
test/MC/X86/RDTSCP-64.s
Normal file
@ -0,0 +1,6 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdtscp
|
||||
// CHECK: encoding: [0x0f,0x01,0xf9]
|
||||
rdtscp
|
||||
|
34
test/MC/X86/RDWRFSGS-64.s
Normal file
34
test/MC/X86/RDWRFSGS-64.s
Normal file
@ -0,0 +1,34 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
|
||||
|
||||
// CHECK: rdfsbasel %r13d
|
||||
// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xc5]
|
||||
rdfsbasel %r13d
|
||||
|
||||
// CHECK: rdfsbaseq %r13
|
||||
// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xc5]
|
||||
rdfsbaseq %r13
|
||||
|
||||
// CHECK: rdgsbasel %r13d
|
||||
// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xcd]
|
||||
rdgsbasel %r13d
|
||||
|
||||
// CHECK: rdgsbaseq %r13
|
||||
// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xcd]
|
||||
rdgsbaseq %r13
|
||||
|
||||
// CHECK: wrfsbasel %r13d
|
||||
// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xd5]
|
||||
wrfsbasel %r13d
|
||||
|
||||
// CHECK: wrfsbaseq %r13
|
||||
// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xd5]
|
||||
wrfsbaseq %r13
|
||||
|
||||
// CHECK: wrgsbasel %r13d
|
||||
// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xdd]
|
||||
wrgsbasel %r13d
|
||||
|
||||
// CHECK: wrgsbaseq %r13
|
||||
// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xdd]
|
||||
wrgsbaseq %r13
|
||||
|
@ -99,6 +99,10 @@
|
||||
// CHECK: shll $2, %eax
|
||||
sall $2, %eax
|
||||
|
||||
// CHECK: rep movsb
|
||||
rep # comment
|
||||
movsb
|
||||
|
||||
// CHECK: rep
|
||||
// CHECK: insb
|
||||
rep;insb
|
||||
|
52
test/MC/X86/x86_64-asm-match.s
Normal file
52
test/MC/X86/x86_64-asm-match.s
Normal file
@ -0,0 +1,52 @@
|
||||
// RUN: llvm-mc -triple x86_64-unknown-unknown -debug-only=asm-matcher %s 2>&1 | FileCheck %s
|
||||
// REQUIRES: asserts
|
||||
|
||||
// CHECK: AsmMatcher: found 4 encodings with mnemonic 'pshufb'
|
||||
// CHECK:Trying to match opcode MMX_PSHUFBrr64
|
||||
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
|
||||
// CHECK:Trying to match opcode PSHUFBrr
|
||||
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
|
||||
// CHECK:Trying to match opcode PSHUFBrm
|
||||
// CHECK: Matching formal operand class MCK_Mem128 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
|
||||
// CHECK:AsmMatcher: found 2 encodings with mnemonic 'sha1rnds4'
|
||||
// CHECK:Trying to match opcode SHA1RNDS4rri
|
||||
// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
|
||||
// CHECK:AsmMatcher: found 4 encodings with mnemonic 'pinsrw'
|
||||
// CHECK:Trying to match opcode MMX_PINSRWirri
|
||||
// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 3 (): Opcode result: multiple operand mismatches, ignoring this opcode
|
||||
// CHECK:Trying to match opcode PINSRWrri
|
||||
// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode
|
||||
// CHECK:AsmMatcher: found 2 encodings with mnemonic 'crc32l'
|
||||
// CHECK:Trying to match opcode CRC32r32r32
|
||||
// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode
|
||||
// CHECK:Trying to match opcode CRC32r32m32
|
||||
// CHECK: Matching formal operand class MCK_Mem32 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
|
||||
// CHECK:AsmMatcher: found 4 encodings with mnemonic 'punpcklbw'
|
||||
// CHECK:Trying to match opcode MMX_PUNPCKLBWirr
|
||||
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 2 (): Opcode result: multiple operand mismatches, ignoring this opcode
|
||||
// CHECK:Trying to match opcode MMX_PUNPCKLBWirm
|
||||
// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class MCK_Mem64 against actual operand at index 2 (): match success using generic matcher
|
||||
// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode
|
||||
|
||||
|
||||
pshufb CPI1_0(%rip), %xmm1
|
||||
sha1rnds4 $1, %xmm1, %xmm2
|
||||
pinsrw $3, %ecx, %xmm5
|
||||
crc32l %gs:0xdeadbeef(%rbx,%rcx,8),%ecx
|
||||
|
||||
.intel_syntax
|
||||
punpcklbw mm0, qword ptr [rsp]
|
@ -0,0 +1,26 @@
|
||||
; XFAIL: *
|
||||
; RUN: opt -safepoint-ir-verifier-print-only -verify-safepoint-ir -S %s 2>&1 | FileCheck %s
|
||||
|
||||
; In %merge %val.unrelocated, %ptr and %arg should be unrelocated.
|
||||
; FIXME: if this test fails it is a false-positive alarm. IR is correct.
|
||||
define void @test.unrelocated-phi.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
|
||||
; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.ok
|
||||
bci_0:
|
||||
%ptr = getelementptr i8, i8 addrspace(1)* %arg, i64 4
|
||||
br i1 undef, label %left, label %right
|
||||
|
||||
left:
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
|
||||
br label %merge
|
||||
|
||||
right:
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.ok
|
||||
%val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ %ptr, %right ]
|
||||
%c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg
|
||||
ret void
|
||||
}
|
||||
|
||||
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
|
@ -1,8 +1,9 @@
|
||||
; RUN: opt %s -safepoint-ir-verifier-print-only -verify-safepoint-ir -S 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: Illegal use of unrelocated value found!
|
||||
; CHECK-NEXT: Def: %base_phi3 = phi %jObject addrspace(1)* [ %obj609.relocated, %not_zero146 ], [ %base_phi2, %bci_37-aload ], !is_base_value !0
|
||||
; CHECK-NEXT: Use: %base_phi2 = phi %jObject addrspace(1)* [ %base_phi3, %not_zero179 ], [ %cast5, %bci_0 ], !is_base_value !0
|
||||
; CHECK-NEXT: Def: %base_phi4 = phi %jObject addrspace(1)* addrspace(1)* [ %addr98.relocated, %not_zero146 ], [ %cast6, %bci_37-aload ], !is_base_value !0
|
||||
; CHECK-NEXT: Use: %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, %jObject addrspace(1)* %base_phi1, %jObject addrspace(1)* addrspace(1)* %base_phi4, %jObject addrspace(1)* addrspace(1)* %relocated4, %jObject addrspace(1)* %relocated7)
|
||||
|
||||
|
||||
%jObject = type { [8 x i8] }
|
||||
|
||||
|
@ -14,9 +14,9 @@ define i8 addrspace(1)* @test.not.ok.0(i8 addrspace(1)* %arg) gc "statepoint-exa
|
||||
|
||||
merge:
|
||||
; CHECK: Illegal use of unrelocated value found!
|
||||
; CHECK-NEXT: Def: i8 addrspace(1)* %arg
|
||||
; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
|
||||
%val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right]
|
||||
; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
|
||||
; CHECK-NEXT: Use: ret i8 addrspace(1)* %val
|
||||
%val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ]
|
||||
ret i8 addrspace(1)* %val
|
||||
}
|
||||
|
||||
@ -34,9 +34,9 @@ define i8 addrspace(1)* @test.not.ok.1(i8 addrspace(1)* %arg) gc "statepoint-exa
|
||||
|
||||
merge:
|
||||
; CHECK: Illegal use of unrelocated value found!
|
||||
; CHECK-NEXT: Def: i8 addrspace(1)* %arg
|
||||
; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
|
||||
%val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right]
|
||||
; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
|
||||
; CHECK-NEXT: Use: ret i8 addrspace(1)* %val
|
||||
%val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
|
||||
ret i8 addrspace(1)* %val
|
||||
}
|
||||
|
||||
@ -74,5 +74,99 @@ define i8 addrspace(1)* @test.ok.1(i8 addrspace(1)* %arg) gc "statepoint-example
|
||||
ret i8 addrspace(1)* %val
|
||||
}
|
||||
|
||||
; It should be allowed to compare poisoned ptr with null.
|
||||
define void @test.poisoned.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
|
||||
; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.ok
|
||||
bci_0:
|
||||
br i1 undef, label %left, label %right
|
||||
|
||||
left:
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
%arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
|
||||
br label %merge
|
||||
|
||||
right:
|
||||
%safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
; CHECK: No illegal uses found by SafepointIRVerifier in: test.poisoned.cmp.ok
|
||||
%val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
|
||||
%c = icmp eq i8 addrspace(1)* %val.poisoned, null
|
||||
ret void
|
||||
}
|
||||
|
||||
; It is illegal to compare poisoned ptr and relocated.
|
||||
define void @test.poisoned.cmp.fail.0(i8 addrspace(1)* %arg) gc "statepoint-example" {
|
||||
; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.0
|
||||
bci_0:
|
||||
br i1 undef, label %left, label %right
|
||||
|
||||
left:
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
%arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
|
||||
br label %merge
|
||||
|
||||
right:
|
||||
%safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
%arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
; CHECK: Illegal use of unrelocated value found!
|
||||
; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
|
||||
; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %val
|
||||
%val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
|
||||
%val = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg.relocated2, %right ]
|
||||
%c = icmp eq i8 addrspace(1)* %val.poisoned, %val
|
||||
ret void
|
||||
}
|
||||
|
||||
; It is illegal to compare poisoned ptr and unrelocated.
|
||||
define void @test.poisoned.cmp.fail.1(i8 addrspace(1)* %arg) gc "statepoint-example" {
|
||||
; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.1
|
||||
bci_0:
|
||||
br i1 undef, label %left, label %right
|
||||
|
||||
left:
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
%arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg
|
||||
br label %merge
|
||||
|
||||
right:
|
||||
%safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0)
|
||||
%arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
; CHECK: Illegal use of unrelocated value found!
|
||||
; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
|
||||
; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg
|
||||
%val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ]
|
||||
%c = icmp eq i8 addrspace(1)* %val.poisoned, %arg
|
||||
ret void
|
||||
}
|
||||
|
||||
; It should be allowed to compare unrelocated phi with unrelocated value.
|
||||
define void @test.unrelocated-phi.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" {
|
||||
; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.cmp.ok
|
||||
bci_0:
|
||||
br i1 undef, label %left, label %right
|
||||
|
||||
left:
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
|
||||
br label %merge
|
||||
|
||||
right:
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.cmp.ok
|
||||
%val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ]
|
||||
%c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg
|
||||
ret void
|
||||
}
|
||||
|
||||
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
|
||||
declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
|
||||
declare void @not_statepoint()
|
||||
|
8
test/ThinLTO/X86/Inputs/noinline.ll
Normal file
8
test/ThinLTO/X86/Inputs/noinline.ll
Normal file
@ -0,0 +1,8 @@
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
define i32 @foo(i32) local_unnamed_addr #0 {
|
||||
ret i32 10
|
||||
}
|
||||
|
||||
attributes #0 = { noinline }
|
26
test/ThinLTO/X86/noinline.ll
Normal file
26
test/ThinLTO/X86/noinline.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; This test checks that ThinLTO doesn't try to import noinline function
|
||||
; which, when takes place, causes promotion of its callee.
|
||||
; RUN: opt -module-summary %s -o %t1.bc
|
||||
; RUN: opt -module-summary %p/Inputs/noinline.ll -o %t2.bc
|
||||
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t3.o \
|
||||
; RUN: -save-temps \
|
||||
; RUN: -r=%t1.bc,main,px \
|
||||
; RUN: -r=%t1.bc,foo, \
|
||||
; RUN: -r=%t2.bc,foo,p
|
||||
|
||||
; RUN: llvm-dis %t3.o.1.3.import.bc -o - | FileCheck %s
|
||||
|
||||
; CHECK-NOT: define available_externally i32 @foo
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-pc-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define i32 @main(i32, i8** nocapture readnone) local_unnamed_addr #0 {
|
||||
%3 = tail call i32 @foo(i32 %0) #0
|
||||
ret i32 %3
|
||||
}
|
||||
|
||||
declare i32 @foo(i32) local_unnamed_addr
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -122,6 +122,19 @@ entry:
|
||||
}
|
||||
|
||||
|
||||
define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) {
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD256_undef(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> fsub (<4 x double> undef, <4 x double> undef), <4 x double> zeroinitializer, i32 5, i8 -1)
|
||||
; CHECK-NEXT: ret i8 [[TMP]]
|
||||
;
|
||||
entry:
|
||||
%sub.i1 = fsub ninf <4 x double> undef, undef
|
||||
%tmp = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
|
||||
ret i8 %tmp
|
||||
}
|
||||
|
||||
|
||||
define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
|
||||
; CHECK-LABEL: @sub_compare_folding_swapPD512(
|
||||
; CHECK-NEXT: entry:
|
||||
|
11
test/Transforms/InstCombine/extractelement.ll
Normal file
11
test/Transforms/InstCombine/extractelement.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
define i32 @extractelement_out_of_range(<2 x i32> %x) {
|
||||
; CHECK-LABEL: @extractelement_out_of_range(
|
||||
; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i8 16
|
||||
; CHECK-NEXT: ret i32 [[E1]]
|
||||
;
|
||||
%E1 = extractelement <2 x i32> %x, i8 16
|
||||
ret i32 %E1
|
||||
}
|
@ -267,12 +267,17 @@ define void @powi(double %V, double *%P) {
|
||||
|
||||
%C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
|
||||
store volatile double %C, double* %P
|
||||
|
||||
%D = tail call double @llvm.powi.f64(double %V, i32 2) nounwind
|
||||
store volatile double %D, double* %P
|
||||
ret void
|
||||
; CHECK-LABEL: @powi(
|
||||
; CHECK: %A = fdiv double 1.0{{.*}}, %V
|
||||
; CHECK: store volatile double %A,
|
||||
; CHECK: store volatile double 1.0
|
||||
; CHECK: store volatile double %V
|
||||
; CHECK: %D = fmul double %V, %V
|
||||
; CHECK: store volatile double %D
|
||||
}
|
||||
|
||||
define i32 @cttz(i32 %a) {
|
||||
|
@ -744,3 +744,158 @@ define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) {
|
||||
%res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 255, i8 255>
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
; Remove a min/max op in a sequence with a common operand.
|
||||
; PR35717: https://bugs.llvm.org/show_bug.cgi?id=35717
|
||||
|
||||
; min(min(a, b), min(b, c)) --> min(min(a, b), c)
|
||||
|
||||
define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @common_factor_smin(
|
||||
; CHECK-NEXT: [[CMP_AB:%.*]] = icmp slt i32 %a, %b
|
||||
; CHECK-NEXT: [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 %a, i32 %b
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp slt i32 %b, %c
|
||||
; CHECK-NEXT: [[MIN_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
|
||||
; CHECK-NEXT: [[CMP_AB_BC:%.*]] = icmp slt i32 [[MIN_AB]], [[MIN_BC]]
|
||||
; CHECK-NEXT: [[MIN_ABC:%.*]] = select i1 [[CMP_AB_BC]], i32 [[MIN_AB]], i32 [[MIN_BC]]
|
||||
; CHECK-NEXT: ret i32 [[MIN_ABC]]
|
||||
;
|
||||
%cmp_ab = icmp slt i32 %a, %b
|
||||
%min_ab = select i1 %cmp_ab, i32 %a, i32 %b
|
||||
%cmp_bc = icmp slt i32 %b, %c
|
||||
%min_bc = select i1 %cmp_bc, i32 %b, i32 %c
|
||||
%cmp_ab_bc = icmp slt i32 %min_ab, %min_bc
|
||||
%min_abc = select i1 %cmp_ab_bc, i32 %min_ab, i32 %min_bc
|
||||
ret i32 %min_abc
|
||||
}
|
||||
|
||||
; max(max(a, b), max(c, b)) --> max(max(a, b), c)
|
||||
|
||||
define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
|
||||
; CHECK-LABEL: @common_factor_smax(
|
||||
; CHECK-NEXT: [[CMP_AB:%.*]] = icmp sgt <2 x i32> %a, %b
|
||||
; CHECK-NEXT: [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
|
||||
; CHECK-NEXT: [[CMP_CB:%.*]] = icmp sgt <2 x i32> %c, %b
|
||||
; CHECK-NEXT: [[MAX_CB:%.*]] = select <2 x i1> [[CMP_CB]], <2 x i32> %c, <2 x i32> %b
|
||||
; CHECK-NEXT: [[CMP_AB_CB:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[MAX_CB]]
|
||||
; CHECK-NEXT: [[MAX_ABC:%.*]] = select <2 x i1> [[CMP_AB_CB]], <2 x i32> [[MAX_AB]], <2 x i32> [[MAX_CB]]
|
||||
; CHECK-NEXT: ret <2 x i32> [[MAX_ABC]]
|
||||
;
|
||||
%cmp_ab = icmp sgt <2 x i32> %a, %b
|
||||
%max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
|
||||
%cmp_cb = icmp sgt <2 x i32> %c, %b
|
||||
%max_cb = select <2 x i1> %cmp_cb, <2 x i32> %c, <2 x i32> %b
|
||||
%cmp_ab_cb = icmp sgt <2 x i32> %max_ab, %max_cb
|
||||
%max_abc = select <2 x i1> %cmp_ab_cb, <2 x i32> %max_ab, <2 x i32> %max_cb
|
||||
ret <2 x i32> %max_abc
|
||||
}
|
||||
|
||||
; min(min(b, c), min(a, b)) --> min(min(b, c), a)
|
||||
|
||||
define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
|
||||
; CHECK-LABEL: @common_factor_umin(
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ult <2 x i32> %b, %c
|
||||
; CHECK-NEXT: [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> %b, <2 x i32> %c
|
||||
; CHECK-NEXT: [[CMP_AB:%.*]] = icmp ult <2 x i32> %a, %b
|
||||
; CHECK-NEXT: [[MIN_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
|
||||
; CHECK-NEXT: [[CMP_BC_AB:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[MIN_AB]]
|
||||
; CHECK-NEXT: [[MIN_ABC:%.*]] = select <2 x i1> [[CMP_BC_AB]], <2 x i32> [[MIN_BC]], <2 x i32> [[MIN_AB]]
|
||||
; CHECK-NEXT: ret <2 x i32> [[MIN_ABC]]
|
||||
;
|
||||
%cmp_bc = icmp ult <2 x i32> %b, %c
|
||||
%min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c
|
||||
%cmp_ab = icmp ult <2 x i32> %a, %b
|
||||
%min_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b
|
||||
%cmp_bc_ab = icmp ult <2 x i32> %min_bc, %min_ab
|
||||
%min_abc = select <2 x i1> %cmp_bc_ab, <2 x i32> %min_bc, <2 x i32> %min_ab
|
||||
ret <2 x i32> %min_abc
|
||||
}
|
||||
|
||||
; max(max(b, c), max(b, a)) --> max(max(b, c), a)
|
||||
|
||||
define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @common_factor_umax(
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
|
||||
; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
|
||||
; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
|
||||
; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
|
||||
; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
|
||||
; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
|
||||
; CHECK-NEXT: ret i32 [[MAX_ABC]]
|
||||
;
|
||||
%cmp_bc = icmp ugt i32 %b, %c
|
||||
%max_bc = select i1 %cmp_bc, i32 %b, i32 %c
|
||||
%cmp_ba = icmp ugt i32 %b, %a
|
||||
%max_ba = select i1 %cmp_ba, i32 %b, i32 %a
|
||||
%cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
|
||||
%max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
|
||||
ret i32 %max_abc
|
||||
}
|
||||
|
||||
declare void @extra_use(i32)
|
||||
|
||||
define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @common_factor_umax_extra_use_lhs(
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
|
||||
; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
|
||||
; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
|
||||
; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
|
||||
; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
|
||||
; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
|
||||
; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]])
|
||||
; CHECK-NEXT: ret i32 [[MAX_ABC]]
|
||||
;
|
||||
%cmp_bc = icmp ugt i32 %b, %c
|
||||
%max_bc = select i1 %cmp_bc, i32 %b, i32 %c
|
||||
%cmp_ba = icmp ugt i32 %b, %a
|
||||
%max_ba = select i1 %cmp_ba, i32 %b, i32 %a
|
||||
%cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
|
||||
%max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
|
||||
call void @extra_use(i32 %max_bc)
|
||||
ret i32 %max_abc
|
||||
}
|
||||
|
||||
define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @common_factor_umax_extra_use_rhs(
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
|
||||
; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
|
||||
; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
|
||||
; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
|
||||
; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
|
||||
; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
|
||||
; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]])
|
||||
; CHECK-NEXT: ret i32 [[MAX_ABC]]
|
||||
;
|
||||
%cmp_bc = icmp ugt i32 %b, %c
|
||||
%max_bc = select i1 %cmp_bc, i32 %b, i32 %c
|
||||
%cmp_ba = icmp ugt i32 %b, %a
|
||||
%max_ba = select i1 %cmp_ba, i32 %b, i32 %a
|
||||
%cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
|
||||
%max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
|
||||
call void @extra_use(i32 %max_ba)
|
||||
ret i32 %max_abc
|
||||
}
|
||||
|
||||
define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK-LABEL: @common_factor_umax_extra_use_both(
|
||||
; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
|
||||
; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
|
||||
; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
|
||||
; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
|
||||
; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
|
||||
; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
|
||||
; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]])
|
||||
; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]])
|
||||
; CHECK-NEXT: ret i32 [[MAX_ABC]]
|
||||
;
|
||||
%cmp_bc = icmp ugt i32 %b, %c
|
||||
%max_bc = select i1 %cmp_bc, i32 %b, i32 %c
|
||||
%cmp_ba = icmp ugt i32 %b, %a
|
||||
%max_ba = select i1 %cmp_ba, i32 %b, i32 %a
|
||||
%cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba
|
||||
%max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba
|
||||
call void @extra_use(i32 %max_bc)
|
||||
call void @extra_use(i32 %max_ba)
|
||||
ret i32 %max_abc
|
||||
}
|
||||
|
||||
|
@ -155,13 +155,13 @@ define i8 @t13(float %a) {
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
; <= comparison, where %a could be -0.0. Not safe.
|
||||
; %a could be -0.0, but it doesn't matter because the conversion to int is the same for 0.0 or -0.0.
|
||||
define i8 @t14(float %a) {
|
||||
; CHECK-LABEL: @t14(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule float %a, 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fptosi float %a to i8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 0
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
; CHECK-NEXT: [[DOTINV:%.*]] = fcmp oge float %a, 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float %a
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
|
||||
; CHECK-NEXT: ret i8 [[TMP2]]
|
||||
;
|
||||
%1 = fcmp ule float %a, 0.0
|
||||
%2 = fptosi float %a to i8
|
||||
@ -169,6 +169,19 @@ define i8 @t14(float %a) {
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
define i8 @t14_commute(float %a) {
|
||||
; CHECK-LABEL: @t14_commute(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt float %a, 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float %a, float 0.000000e+00
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i8
|
||||
; CHECK-NEXT: ret i8 [[TMP3]]
|
||||
;
|
||||
%1 = fcmp ule float %a, 0.0
|
||||
%2 = fptosi float %a to i8
|
||||
%3 = select i1 %1, i8 0, i8 %2
|
||||
ret i8 %3
|
||||
}
|
||||
|
||||
define i8 @t15(float %a) {
|
||||
; CHECK-LABEL: @t15(
|
||||
; CHECK-NEXT: [[DOTINV:%.*]] = fcmp nsz oge float %a, 0.000000e+00
|
||||
|
@ -593,3 +593,17 @@ define <2 x i32> @test23(<2 x i32> %A) {
|
||||
%mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647>
|
||||
ret <2 x i32> %mul
|
||||
}
|
||||
|
||||
; FP division-by-zero is not UB.
|
||||
|
||||
define double @PR34870(i1 %cond, double %x, double %y) {
|
||||
; CHECK-LABEL: @PR34870(
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 %cond, double %y, double 0.000000e+00
|
||||
; CHECK-NEXT: [[FMOD:%.*]] = frem double %x, [[SEL]]
|
||||
; CHECK-NEXT: ret double [[FMOD]]
|
||||
;
|
||||
%sel = select i1 %cond, double %y, double 0.0
|
||||
%fmod = frem double %x, %sel
|
||||
ret double %fmod
|
||||
}
|
||||
|
||||
|
13
test/Transforms/InstSimplify/extract-element.ll
Normal file
13
test/Transforms/InstSimplify/extract-element.ll
Normal file
@ -0,0 +1,13 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instsimplify -S | FileCheck %s
|
||||
|
||||
; Weird Types
|
||||
|
||||
define i129 @vec_extract_negidx(<3 x i129> %a) {
|
||||
; CHECK-LABEL: @vec_extract_negidx(
|
||||
; CHECK-NEXT: [[E1:%.*]] = extractelement <3 x i129> [[A:%.*]], i129 -1
|
||||
; CHECK-NEXT: ret i129 [[E1]]
|
||||
;
|
||||
%E1 = extractelement <3 x i129> %a, i129 -1
|
||||
ret i129 %E1
|
||||
}
|
128
test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
Normal file
128
test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
Normal file
@ -0,0 +1,128 @@
|
||||
; RUN: opt -loop-unroll -unroll-runtime -unroll-runtime-epilog -S %s | FileCheck %s
|
||||
|
||||
; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit.
|
||||
|
||||
; CHECK: for.body.i:
|
||||
; CHECK: br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]]
|
||||
; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit:
|
||||
; CHECK: br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]]
|
||||
; CHECK: lee1.exit.loopexit.unr-lcssa:
|
||||
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]]
|
||||
; CHECK: br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]]
|
||||
; CHECK: for.body.i.epil.preheader:
|
||||
; CHECK: br label %for.body.i.epil, !dbg ![[LOOP_LOC]]
|
||||
; CHECK: lee1.exit.loopexit:
|
||||
; CHECK: br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]]
|
||||
|
||||
; CHECK-DAG: ![[LOOP_LOC]] = !DILocation(line: 5, column: 3, scope: !{{.*}}, inlinedAt: !{{.*}})
|
||||
; CHECK-DAG: ![[EXIT_LOC]] = !DILocation(line: 11, column: 12, scope: !{{.*}}, inlinedAt: !{{.*}})
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
define i32 @goo(i32 %a, i32 %b) local_unnamed_addr #0 !dbg !8 {
|
||||
entry:
|
||||
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !15), !dbg !16
|
||||
tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !14, metadata !15), !dbg !17
|
||||
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !18, metadata !15), !dbg !26
|
||||
tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !21, metadata !15), !dbg !28
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29
|
||||
%cmp7.i = icmp eq i32 %b, 0, !dbg !31
|
||||
br i1 %cmp7.i, label %lee1.exit, label %for.body.i.preheader, !dbg !33
|
||||
|
||||
for.body.i.preheader: ; preds = %entry
|
||||
br label %for.body.i, !dbg !34
|
||||
|
||||
for.body.i: ; preds = %for.body.i.preheader, %for.body.i
|
||||
%i.09.i = phi i32 [ %inc.i, %for.body.i ], [ 0, %for.body.i.preheader ]
|
||||
%t.08.i = phi i32 [ %add1.i, %for.body.i ], [ 0, %for.body.i.preheader ]
|
||||
%div.i = sdiv i32 %t.08.i, 2, !dbg !34
|
||||
%add.i = add i32 %t.08.i, %a, !dbg !35
|
||||
%add1.i = add i32 %add.i, %div.i, !dbg !36
|
||||
tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
|
||||
%inc.i = add nuw i32 %i.09.i, 1, !dbg !37
|
||||
tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
|
||||
tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30
|
||||
tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29
|
||||
%exitcond.i = icmp eq i32 %inc.i, %b, !dbg !31
|
||||
br i1 %exitcond.i, label %lee1.exit.loopexit, label %for.body.i, !dbg !33, !llvm.loop !38
|
||||
|
||||
lee1.exit.loopexit: ; preds = %for.body.i
|
||||
%add1.i.lcssa = phi i32 [ %add1.i, %for.body.i ]
|
||||
br label %lee1.exit, !dbg !41
|
||||
|
||||
lee1.exit: ; preds = %lee1.exit.loopexit, %entry
|
||||
%t.0.lcssa.i = phi i32 [ 0, %entry ], [ %add1.i.lcssa, %lee1.exit.loopexit ]
|
||||
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !44, metadata !15), !dbg !47
|
||||
tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !45, metadata !15), !dbg !48
|
||||
%add.i4 = add nsw i32 %b, %a, !dbg !41
|
||||
%sub.i = sub nsw i32 %a, %b, !dbg !49
|
||||
%mul.i = mul nsw i32 %add.i4, %sub.i, !dbg !50
|
||||
%add = add nsw i32 %t.0.lcssa.i, %mul.i, !dbg !51
|
||||
ret i32 %add, !dbg !52
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!llvm.ident = !{!7}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "t.c", directory: "/prj/llvm-arm/scratch1/zhaoshiz/bugs/debug-symbol")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !{i32 1, !"wchar_size", i32 4}
|
||||
!6 = !{i32 1, !"min_enum_size", i32 4}
|
||||
!7 = !{!"Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)"}
|
||||
!8 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 23, type: !9, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12)
|
||||
!9 = !DISubroutineType(types: !10)
|
||||
!10 = !{!11, !11, !11}
|
||||
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!12 = !{!13, !14}
|
||||
!13 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 23, type: !11)
|
||||
!14 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 23, type: !11)
|
||||
!15 = !DIExpression()
|
||||
!16 = !DILocation(line: 23, column: 14, scope: !8)
|
||||
!17 = !DILocation(line: 23, column: 21, scope: !8)
|
||||
!18 = !DILocalVariable(name: "a", arg: 1, scope: !19, file: !1, line: 3, type: !11)
|
||||
!19 = distinct !DISubprogram(name: "lee1", scope: !1, file: !1, line: 3, type: !9, isLocal: true, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20)
|
||||
!20 = !{!18, !21, !22, !23}
|
||||
!21 = !DILocalVariable(name: "b", arg: 2, scope: !19, file: !1, line: 3, type: !11)
|
||||
!22 = !DILocalVariable(name: "t", scope: !19, file: !1, line: 4, type: !11)
|
||||
!23 = !DILocalVariable(name: "i", scope: !24, file: !1, line: 5, type: !25)
|
||||
!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3)
|
||||
!25 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
|
||||
!26 = !DILocation(line: 3, column: 22, scope: !19, inlinedAt: !27)
|
||||
!27 = distinct !DILocation(line: 24, column: 27, scope: !8)
|
||||
!28 = !DILocation(line: 3, column: 29, scope: !19, inlinedAt: !27)
|
||||
!29 = !DILocation(line: 4, column: 7, scope: !19, inlinedAt: !27)
|
||||
!30 = !DILocation(line: 5, column: 17, scope: !24, inlinedAt: !27)
|
||||
!31 = !DILocation(line: 5, column: 23, scope: !32, inlinedAt: !27)
|
||||
!32 = distinct !DILexicalBlock(scope: !24, file: !1, line: 5, column: 3)
|
||||
!33 = !DILocation(line: 5, column: 3, scope: !24, inlinedAt: !27)
|
||||
!34 = !DILocation(line: 6, column: 13, scope: !32, inlinedAt: !27)
|
||||
!35 = !DILocation(line: 6, column: 11, scope: !32, inlinedAt: !27)
|
||||
!36 = !DILocation(line: 6, column: 7, scope: !32, inlinedAt: !27)
|
||||
!37 = !DILocation(line: 5, column: 28, scope: !32, inlinedAt: !27)
|
||||
!38 = distinct !{!38, !39, !40}
|
||||
!39 = !DILocation(line: 5, column: 3, scope: !24)
|
||||
!40 = !DILocation(line: 6, column: 14, scope: !24)
|
||||
!41 = !DILocation(line: 11, column: 12, scope: !42, inlinedAt: !46)
|
||||
!42 = distinct !DISubprogram(name: "lee2", scope: !1, file: !1, line: 10, type: !9, isLocal: true, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !43)
|
||||
!43 = !{!44, !45}
|
||||
!44 = !DILocalVariable(name: "a", arg: 1, scope: !42, file: !1, line: 10, type: !11)
|
||||
!45 = !DILocalVariable(name: "b", arg: 2, scope: !42, file: !1, line: 10, type: !11)
|
||||
!46 = distinct !DILocation(line: 24, column: 40, scope: !8)
|
||||
!47 = !DILocation(line: 10, column: 22, scope: !42, inlinedAt: !46)
|
||||
!48 = !DILocation(line: 10, column: 29, scope: !42, inlinedAt: !46)
|
||||
!49 = !DILocation(line: 11, column: 20, scope: !42, inlinedAt: !46)
|
||||
!50 = !DILocation(line: 11, column: 16, scope: !42, inlinedAt: !46)
|
||||
!51 = !DILocation(line: 24, column: 38, scope: !8)
|
||||
!52 = !DILocation(line: 24, column: 3, scope: !8)
|
@ -13,9 +13,11 @@
|
||||
; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
|
||||
; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
|
||||
; EPILOG: for.body.epil.preheader:
|
||||
; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]]
|
||||
; EPILOG: br label %for.body.epil, !dbg [[BODY_LOC]]
|
||||
; EPILOG: for.body.epil:
|
||||
; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]]
|
||||
; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]]
|
||||
; EPILOG: for.end.loopexit:
|
||||
; EPILOG: br label %for.end, !dbg [[EXIT_LOC:![0-9]+]]
|
||||
|
||||
; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
|
||||
; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
|
||||
|
@ -1,48 +0,0 @@
|
||||
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||
; Test memcpy-memcpy dependencies across invoke edges.
|
||||
|
||||
; Test that memcpyopt works across the non-unwind edge of an invoke.
|
||||
|
||||
define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
entry:
|
||||
%temp = alloca i8, i32 64
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
invoke void @invoke_me()
|
||||
to label %try.cont unwind label %lpad
|
||||
|
||||
lpad:
|
||||
landingpad { i8*, i32 }
|
||||
catch i8* null
|
||||
ret void
|
||||
|
||||
try.cont:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that memcpyopt works across the unwind edge of an invoke.
|
||||
|
||||
define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
entry:
|
||||
%temp = alloca i8, i32 64
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
invoke void @invoke_me()
|
||||
to label %try.cont unwind label %lpad
|
||||
|
||||
lpad:
|
||||
landingpad { i8*, i32 }
|
||||
catch i8* null
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
|
||||
ret void
|
||||
|
||||
try.cont:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
declare void @invoke_me() readnone
|
@ -1,45 +0,0 @@
|
||||
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||
; Update cached non-local dependence information when merging stores into memset.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Don't delete the memcpy in %if.then, even though it depends on an instruction
|
||||
; which will be deleted.
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) {
|
||||
entry:
|
||||
%tmp = alloca [50 x i8], align 8
|
||||
%tmp4 = bitcast [50 x i8]* %tmp to i8*
|
||||
%tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1
|
||||
call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
|
||||
store i8 0, i8* %tmp4, align 8, !dbg !5
|
||||
; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false)
|
||||
br i1 %c, label %if.then, label %exit
|
||||
|
||||
if.then:
|
||||
; CHECK: if.then:
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false)
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
|
||||
declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "t.rs", directory: "/tmp")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!5 = !DILocation(line: 8, column: 5, scope: !6)
|
||||
!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
|
||||
!7 = !DISubroutineType(types: !8)
|
||||
!8 = !{null}
|
@ -1,36 +0,0 @@
|
||||
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||
; Handle memcpy-memcpy dependencies of differing sizes correctly.
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
; Don't delete the second memcpy, even though there's an earlier
|
||||
; memcpy with a larger size from the same address.
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
define i32 @foo(i1 %z) {
|
||||
entry:
|
||||
%a = alloca [10 x i32]
|
||||
%s = alloca [10 x i32]
|
||||
%0 = bitcast [10 x i32]* %a to i8*
|
||||
%1 = bitcast [10 x i32]* %s to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false)
|
||||
%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0
|
||||
store i32 1, i32* %arrayidx
|
||||
%scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1
|
||||
%scevgep7 = bitcast i32* %scevgep to i8*
|
||||
br i1 %z, label %for.body3.lr.ph, label %for.inc7.1
|
||||
|
||||
for.body3.lr.ph: ; preds = %entry
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false)
|
||||
br label %for.inc7.1
|
||||
|
||||
for.inc7.1:
|
||||
; CHECK: for.inc7.1:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false)
|
||||
%2 = load i32, i32* %arrayidx
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
|
||||
declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
|
@ -1,114 +0,0 @@
|
||||
; RUN: opt < %s -memcpyopt -S | FileCheck %s
|
||||
; Make sure memcpy-memcpy dependence is optimized across
|
||||
; basic blocks (conditional branches and invokes).
|
||||
|
||||
%struct.s = type { i32, i32 }
|
||||
|
||||
@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
|
||||
@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4
|
||||
@i = external constant i8*
|
||||
|
||||
declare void @qux()
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
declare void @__cxa_throw(i8*, i8*, i8*)
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
declare i8* @__cxa_begin_catch(i8*)
|
||||
|
||||
; A simple partial redundancy. Test that the second memcpy is optimized
|
||||
; to copy directly from the original source rather than from the temporary.
|
||||
|
||||
; CHECK-LABEL: @wobble
|
||||
define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) {
|
||||
bb:
|
||||
%temp = alloca i8, i32 64
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false)
|
||||
br i1 %some_condition, label %more, label %out
|
||||
|
||||
out:
|
||||
call void @qux()
|
||||
unreachable
|
||||
|
||||
more:
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; A CFG triangle with a partial redundancy targeting an alloca. Test that the
|
||||
; memcpy inside the triangle is optimized to copy directly from the original
|
||||
; source rather than from the temporary.
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
define i32 @foo(i1 %t3) {
|
||||
bb:
|
||||
%s = alloca %struct.s, align 4
|
||||
%t = alloca %struct.s, align 4
|
||||
%s1 = bitcast %struct.s* %s to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
|
||||
br i1 %t3, label %bb4, label %bb7
|
||||
|
||||
bb4: ; preds = %bb
|
||||
%t5 = bitcast %struct.s* %t to i8*
|
||||
%s6 = bitcast %struct.s* %s to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false)
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb4, %bb
|
||||
%t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0
|
||||
%t9 = load i32, i32* %t8, align 4
|
||||
%t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1
|
||||
%t11 = load i32, i32* %t10, align 4
|
||||
%t12 = add i32 %t9, %t11
|
||||
ret i32 %t12
|
||||
}
|
||||
|
||||
; A CFG diamond with an invoke on one side, and a partially redundant memcpy
|
||||
; into an alloca on the other. Test that the memcpy inside the diamond is
|
||||
; optimized to copy ; directly from the original source rather than from the
|
||||
; temporary. This more complex test represents a relatively common usage
|
||||
; pattern.
|
||||
|
||||
; CHECK-LABEL: @baz
|
||||
define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
bb:
|
||||
%s = alloca %struct.s, align 4
|
||||
%t = alloca %struct.s, align 4
|
||||
%s3 = bitcast %struct.s* %s to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
|
||||
br i1 %t5, label %bb6, label %bb22
|
||||
|
||||
bb6: ; preds = %bb
|
||||
invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null)
|
||||
to label %bb25 unwind label %bb9
|
||||
|
||||
bb9: ; preds = %bb6
|
||||
%t10 = landingpad { i8*, i32 }
|
||||
catch i8* null
|
||||
br label %bb13
|
||||
|
||||
bb13: ; preds = %bb9
|
||||
%t15 = call i8* @__cxa_begin_catch(i8* null)
|
||||
br label %bb23
|
||||
|
||||
bb22: ; preds = %bb
|
||||
%t23 = bitcast %struct.s* %t to i8*
|
||||
%s24 = bitcast %struct.s* %s to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false)
|
||||
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false)
|
||||
br label %bb23
|
||||
|
||||
bb23: ; preds = %bb22, %bb13
|
||||
%t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0
|
||||
%t18 = load i32, i32* %t17, align 4
|
||||
%t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1
|
||||
%t20 = load i32, i32* %t19, align 4
|
||||
%t21 = add nsw i32 %t18, %t20
|
||||
ret i32 %t21
|
||||
|
||||
bb25: ; preds = %bb6
|
||||
unreachable
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user