Update llvm to trunk r256945.

This commit is contained in:
Dimitry Andric 2016-01-06 20:19:13 +00:00
commit 4d0b32cd7f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/clang380-import/; revision=293265
133 changed files with 2992 additions and 1575 deletions

View File

@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory and never returns null (such as operator new).
bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//

View File

@ -97,7 +97,7 @@ class MachineInstr
// of memory operands required to be precise exceeds the maximum value of
// NumMemRefs - currently 256 - we remove the operands entirely. Note also
// that this is a non-owning reference to a shared copy on write buffer owned
// by the MachineFunction and created via MF.allocateMemRefsArray.
// by the MachineFunction and created via MF.allocateMemRefsArray.
mmo_iterator MemRefs;
DebugLoc debugLoc; // Source line information.
@ -354,7 +354,7 @@ class MachineInstr
mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
/// Return true if we don't have any memory operands which described the the
/// memory access done by this instruction. If this is true, calling code
/// must be conservative.
/// must be conservative.
bool memoperands_empty() const { return NumMemRefs == 0; }
iterator_range<mmo_iterator> memoperands() {
@ -774,7 +774,7 @@ class MachineInstr
bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
bool isMSInlineAsm() const {
bool isMSInlineAsm() const {
return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
}
bool isStackAligningInlineAsm() const;
@ -1180,11 +1180,26 @@ class MachineInstr
/// Assign this MachineInstr's memory reference descriptor list.
/// This does not transfer ownership.
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
MemRefs = NewMemRefs;
NumMemRefs = uint8_t(NewMemRefsEnd - NewMemRefs);
assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
setMemRefs(std::make_pair(NewMemRefs, NewMemRefsEnd-NewMemRefs));
}
/// Assign this MachineInstr's memory reference descriptor list. First
/// element in the pair is the begin iterator/pointer to the array; the
/// second is the number of MemoryOperands. This does not transfer ownership
/// of the underlying memory.
void setMemRefs(std::pair<mmo_iterator, unsigned> NewMemRefs) {
MemRefs = NewMemRefs.first;
NumMemRefs = uint8_t(NewMemRefs.second);
assert(NumMemRefs == NewMemRefs.second &&
"Too many memrefs - must drop memory operands");
}
/// Return a set of memrefs (begin iterator, size) which conservatively
/// describe the memory behavior of both MachineInstrs. This is appropriate
/// for use when merging two MachineInstrs into one. This routine does not
/// modify the memrefs of the this MachineInstr.
std::pair<mmo_iterator, unsigned> mergeMemRefsWith(const MachineInstr& Other);
/// Clear this MachineInstr's memory reference descriptor list. This resets
/// the memrefs to their most conservative state. This should be used only
/// as a last resort since it greatly pessimizes our knowledge of the memory

View File

@ -162,6 +162,11 @@ class MachineInstrBuilder {
return *this;
}
const MachineInstrBuilder &setMemRefs(std::pair<MachineInstr::mmo_iterator,
unsigned> MemOperandsRef) const {
MI->setMemRefs(MemOperandsRef);
return *this;
}
const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
MI->addOperand(*MF, MO);

View File

@ -178,7 +178,7 @@ class MachineOperandIteratorBase {
/// register.
bool FullyDefined;
/// Reg or ont of its aliases is read. The register may only be read
/// Reg or one of its aliases is read. The register may only be read
/// partially.
bool Read;
/// Reg or a super-register is read. The full register is read.

View File

@ -83,7 +83,9 @@ enum class ClrHandlerType { Catch, Finally, Fault, Filter };
struct ClrEHUnwindMapEntry {
MBBOrBasicBlock Handler;
uint32_t TypeToken;
int Parent;
int HandlerParentState; ///< Outer handler enclosing this entry's handler
int TryParentState; ///< Outer try region enclosing this entry's try region,
///< treating later catches on same try as "outer"
ClrHandlerType HandlerType;
};

View File

@ -310,6 +310,11 @@ class CallSiteBase {
CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
}
/// \brief Return true if this function has the given attribute.
bool hasFnAttr(StringRef A) const {
CALLSITE_DELEGATE_GETTER(hasFnAttr(A));
}
/// \brief Return true if the call or the callee has the given attribute.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));

View File

@ -61,9 +61,13 @@ class IRBuilderBase {
MDNode *DefaultFPMathTag;
FastMathFlags FMF;
ArrayRef<OperandBundleDef> DefaultOperandBundles;
public:
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
: Context(context), DefaultFPMathTag(FPMathTag), FMF() {
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: Context(context), DefaultFPMathTag(FPMathTag), FMF(),
DefaultOperandBundles(OpBundles) {
ClearInsertionPoint();
}
@ -538,37 +542,44 @@ class IRBuilder : public IRBuilderBase, public Inserter {
public:
IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
MDNode *FPMathTag = nullptr)
: IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(C, FPMathTag, OpBundles), Inserter(std::move(I)),
Folder(F) {}
explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
: IRBuilderBase(C, FPMathTag), Folder() {
}
explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(C, FPMathTag, OpBundles), Folder() {}
explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr)
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
SetInsertPoint(TheBB);
}
explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr)
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(TheBB);
}
explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr)
: IRBuilderBase(IP->getContext(), FPMathTag), Folder() {
explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(IP->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(IP);
}
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
MDNode *FPMathTag = nullptr)
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder(F) {
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T &F,
MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder(F) {
SetInsertPoint(TheBB, IP);
}
IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP,
MDNode *FPMathTag = nullptr)
: IRBuilderBase(TheBB->getContext(), FPMathTag), Folder() {
MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
: IRBuilderBase(TheBB->getContext(), FPMathTag, OpBundles), Folder() {
SetInsertPoint(TheBB, IP);
}
@ -1529,8 +1540,11 @@ class IRBuilder : public IRBuilderBase, public Inserter {
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
ArrayRef<OperandBundleDef> OpBundles = None,
const Twine &Name = "") {
return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(Callee, Args, OpBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
return Insert(CI, Name);
}
CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
@ -1543,7 +1557,7 @@ class IRBuilder : public IRBuilderBase, public Inserter {
CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
ArrayRef<Value *> Args, const Twine &Name = "",
MDNode *FPMathTag = nullptr) {
CallInst *CI = CallInst::Create(FTy, Callee, Args);
CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
if (isa<FPMathOperator>(CI))
CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
return Insert(CI, Name);

View File

@ -3550,6 +3550,11 @@ class InvokeInst : public TerminatorInst,
return hasFnAttrImpl(A);
}
/// \brief Determine whether this call has the given attribute.
bool hasFnAttr(StringRef A) const {
return hasFnAttrImpl(A);
}
/// \brief Determine whether the call or the callee has the given attributes.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
@ -3734,7 +3739,19 @@ class InvokeInst : public TerminatorInst,
unsigned getNumSuccessorsV() const override;
void setSuccessorV(unsigned idx, BasicBlock *B) override;
bool hasFnAttrImpl(Attribute::AttrKind A) const;
template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
return true;
// Operand bundles override attributes on the called function, but don't
// override attributes directly present on the invoke instruction.
if (isFnAttrDisallowedByOpBundle(A))
return false;
if (const Function *F = getCalledFunction())
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
return false;
}
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@ -3966,6 +3983,8 @@ class CatchSwitchInst : public TerminatorInst {
/// point to the added handler.
void addHandler(BasicBlock *Dest);
void removeHandler(handler_iterator HI);
unsigned getNumSuccessors() const { return getNumOperands() - 1; }
BasicBlock *getSuccessor(unsigned Idx) const {
assert(Idx < getNumSuccessors() &&

View File

@ -32,6 +32,19 @@ let TargetPrefix = "x86" in {
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// FLAGS.
let TargetPrefix = "x86" in {
def int_x86_flags_read_u32 : GCCBuiltin<"__builtin_ia32_readeflags_u32">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_x86_flags_read_u64 : GCCBuiltin<"__builtin_ia32_readeflags_u64">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_x86_flags_write_u32 : GCCBuiltin<"__builtin_ia32_writeeflags_u32">,
Intrinsic<[], [llvm_i32_ty], []>;
def int_x86_flags_write_u64 : GCCBuiltin<"__builtin_ia32_writeeflags_u64">,
Intrinsic<[], [llvm_i64_ty], []>;
}
//===----------------------------------------------------------------------===//
// Read Time Stamp Counter.
let TargetPrefix = "x86" in {
@ -2211,6 +2224,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
@ -2229,6 +2261,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">,
Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">,
Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">,
Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">,
Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">,
Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty,
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_q_128: GCCBuiltin<"__builtin_ia32_psrlq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Pack ops.
@ -2696,6 +2791,59 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">,
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Gather ops
@ -3919,9 +4067,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Support protection key
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
Intrinsic<[llvm_i32_ty], [], []>;
def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
// Half float conversion

View File

@ -283,14 +283,20 @@ class ReplaceableMetadataImpl {
LLVMContext &Context;
uint64_t NextIndex;
SmallDenseMap<void *, std::pair<OwnerTy, uint64_t>, 4> UseMap;
/// Flag that can be set to false if this metadata should not be
/// RAUW'ed, e.g. if it is used as the key of a map.
bool CanReplace;
public:
ReplaceableMetadataImpl(LLVMContext &Context)
: Context(Context), NextIndex(0) {}
: Context(Context), NextIndex(0), CanReplace(true) {}
~ReplaceableMetadataImpl() {
assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata");
}
/// Set the CanReplace flag to the given value.
void setCanReplace(bool Replaceable) { CanReplace = Replaceable; }
LLVMContext &getContext() const { return Context; }
/// \brief Replace all uses of this with MD.
@ -901,14 +907,19 @@ class MDNode : public Metadata {
Context.getReplaceableUses()->replaceAllUsesWith(MD);
}
/// Set the CanReplace flag to the given value.
void setCanReplace(bool Replaceable) {
Context.getReplaceableUses()->setCanReplace(Replaceable);
}
/// \brief Resolve cycles.
///
/// Once all forward declarations have been resolved, force cycles to be
/// resolved. If \p MDMaterialized is true, then any temporary metadata
/// resolved. If \p AllowTemps is true, then any temporary metadata
/// is ignored, otherwise it asserts when encountering temporary metadata.
///
/// \pre No operands (or operands' operands, etc.) have \a isTemporary().
void resolveCycles(bool MDMaterialized = true);
void resolveCycles(bool AllowTemps = false);
/// \brief Replace a temporary node with a permanent one.
///

View File

@ -22,6 +22,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
@ -36,14 +37,13 @@ enum class StatepointFlags {
MaskAll = GCTransition ///< A bitmask that includes all valid flags.
};
class GCRelocateOperands;
class GCRelocateInst;
class ImmutableStatepoint;
bool isStatepoint(const ImmutableCallSite &CS);
bool isStatepoint(const Value *V);
bool isStatepoint(const Value &V);
bool isGCRelocate(const Value *V);
bool isGCRelocate(const ImmutableCallSite &CS);
bool isGCResult(const Value *V);
@ -247,7 +247,7 @@ class StatepointBase {
/// May contain several relocations for the same base/derived pair.
/// For example this could happen due to relocations on unwinding
/// path of invoke.
std::vector<GCRelocateOperands> getRelocates() const;
std::vector<const GCRelocateInst *> getRelocates() const;
/// Get the experimental_gc_result call tied to this statepoint. Can be
/// nullptr if there isn't a gc_result tied to this statepoint. Guaranteed to
@ -305,33 +305,27 @@ class Statepoint
explicit Statepoint(CallSite CS) : Base(CS) {}
};
/// Wraps a call to a gc.relocate and provides access to it's operands.
/// TODO: This should likely be refactored to resememble the wrappers in
/// InstrinsicInst.h.
class GCRelocateOperands {
ImmutableCallSite RelocateCS;
/// This represents the gc.relocate intrinsic.
class GCRelocateInst : public IntrinsicInst {
public:
GCRelocateOperands(const User *U) : RelocateCS(U) { assert(isGCRelocate(U)); }
GCRelocateOperands(const Instruction *inst) : RelocateCS(inst) {
assert(isGCRelocate(inst));
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
}
static inline bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
GCRelocateOperands(CallSite CS) : RelocateCS(CS) { assert(isGCRelocate(CS)); }
/// Return true if this relocate is tied to the invoke statepoint.
/// This includes relocates which are on the unwinding path.
bool isTiedToInvoke() const {
const Value *Token = RelocateCS.getArgument(0);
const Value *Token = getArgOperand(0);
return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
}
/// Get enclosed relocate intrinsic
ImmutableCallSite getUnderlyingCallSite() { return RelocateCS; }
/// The statepoint with which this gc.relocate is associated.
const Instruction *getStatepoint() {
const Value *Token = RelocateCS.getArgument(0);
const Instruction *getStatepoint() const {
const Value *Token = getArgOperand(0);
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
@ -354,22 +348,22 @@ class GCRelocateOperands {
/// The index into the associate statepoint's argument list
/// which contains the base pointer of the pointer whose
/// relocation this gc.relocate describes.
unsigned getBasePtrIndex() {
return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
unsigned getBasePtrIndex() const {
return cast<ConstantInt>(getArgOperand(1))->getZExtValue();
}
/// The index into the associate statepoint's argument list which
/// contains the pointer whose relocation this gc.relocate describes.
unsigned getDerivedPtrIndex() {
return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
unsigned getDerivedPtrIndex() const {
return cast<ConstantInt>(getArgOperand(2))->getZExtValue();
}
Value *getBasePtr() {
Value *getBasePtr() const {
ImmutableCallSite CS(getStatepoint());
return *(CS.arg_begin() + getBasePtrIndex());
}
Value *getDerivedPtr() {
Value *getDerivedPtr() const {
ImmutableCallSite CS(getStatepoint());
return *(CS.arg_begin() + getDerivedPtrIndex());
}
@ -377,11 +371,11 @@ class GCRelocateOperands {
template <typename FunTy, typename InstructionTy, typename ValueTy,
typename CallSiteTy>
std::vector<GCRelocateOperands>
std::vector<const GCRelocateInst *>
StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
const {
std::vector<GCRelocateOperands> Result;
std::vector<const GCRelocateInst *> Result;
CallSiteTy StatepointCS = getCallSite();
@ -389,8 +383,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
// gc_relocates ensures that we only get pairs which are actually relocated
// and used after the statepoint.
for (const User *U : getInstruction()->users())
if (isGCRelocate(U))
Result.push_back(GCRelocateOperands(U));
if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
Result.push_back(Relocate);
if (!StatepointCS.isInvoke())
return Result;
@ -401,8 +395,8 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
// Search for gc relocates that are attached to this landingpad.
for (const User *LandingPadUser : LandingPad->users()) {
if (isGCRelocate(LandingPadUser))
Result.push_back(GCRelocateOperands(LandingPadUser));
if (auto *Relocate = dyn_cast<GCRelocateInst>(LandingPadUser))
Result.push_back(Relocate);
}
return Result;
}

View File

@ -39,8 +39,8 @@ class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
FeatureBitset(const bitset<MAX_SUBTARGET_FEATURES>& B) : bitset(B) {}
FeatureBitset(std::initializer_list<unsigned> Init) : bitset() {
for (auto I = Init.begin() , E = Init.end(); I != E; ++I)
set(*I);
for (auto I : Init)
set(I);
}
};
@ -59,6 +59,11 @@ struct SubtargetFeatureKV {
bool operator<(StringRef S) const {
return StringRef(Key) < S;
}
// Compare routine for std::is_sorted.
bool operator<(const SubtargetFeatureKV &Other) const {
return StringRef(Key) < StringRef(Other.Key);
}
};
//===----------------------------------------------------------------------===//
@ -98,14 +103,13 @@ class SubtargetFeatures {
/// Adding Features.
void AddFeature(StringRef String, bool Enable = true);
/// ToggleFeature - Toggle a feature and returns the newly updated feature
/// bits.
FeatureBitset ToggleFeature(FeatureBitset Bits, StringRef String,
ArrayRef<SubtargetFeatureKV> FeatureTable);
/// ToggleFeature - Toggle a feature and update the feature bits.
static void ToggleFeature(FeatureBitset &Bits, StringRef String,
ArrayRef<SubtargetFeatureKV> FeatureTable);
/// Apply the feature flag and return the newly updated feature bits.
FeatureBitset ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable);
/// Apply the feature flag and update the feature bits.
static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable);
/// Get feature bits of a CPU.
FeatureBitset getFeatureBits(StringRef CPU,

View File

@ -155,11 +155,36 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef FuncName);
/// Return the initializer in string of the PGO name var \c NameVar.
StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
/// Given a PGO function name, remove the filename prefix and return
/// the original (static) function name.
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
/// Given a vector of strings (function PGO names) \c NameStrs, the
/// method generates a combined string \c Result thatis ready to be
/// serialized. The \c Result string is comprised of three fields:
/// The first field is the legnth of the uncompressed strings, and the
/// the second field is the length of the zlib-compressed string.
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
/// third field is the uncompressed strings; otherwise it is the
/// compressed string. When the string compression is off, the
/// second field will have value zero.
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
bool doCompression, std::string &Result);
/// Produce \c Result string with the same format described above. The input
/// is vector of PGO function name variables that are referenced.
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
std::string &Result);
class InstrProfSymtab;
/// \c NameStrings is a string composed of one of more sub-strings encoded in
/// the
/// format described above. The substrings are seperated by 0 or more zero
/// bytes.
/// This method decodes the string and populates the \c Symtab.
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
const std::error_category &instrprof_category();
enum class instrprof_error {
@ -235,6 +260,11 @@ class InstrProfSymtab {
/// This interface is used by reader of CoverageMapping test
/// format.
inline std::error_code create(StringRef D, uint64_t BaseAddr);
/// \c NameStrings is a string composed of one of more sub-strings
/// encoded in the format described above. The substrings are
/// seperated by 0 or more zero bytes. This method decodes the
/// string and populates the \c Symtab.
inline std::error_code create(StringRef NameStrings);
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
template <typename NameIterRange> void create(const NameIterRange &IterRange);
@ -255,8 +285,8 @@ class InstrProfSymtab {
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
/// Return function's PGO name from the function name's symabol
/// address in the object file. If an error occurs, Return
/// Return function's PGO name from the function name's symbol
/// address in the object file. If an error occurs, return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
@ -270,6 +300,12 @@ std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
return std::error_code();
}
std::error_code InstrProfSymtab::create(StringRef NameStrings) {
if (readPGOFuncNameStrings(NameStrings, *this))
return make_error_code(instrprof_error::malformed);
return std::error_code();
}
template <typename NameIterRange>
void InstrProfSymtab::create(const NameIterRange &IterRange) {
for (auto Name : IterRange)
@ -576,8 +612,14 @@ template <class IntPtrT> struct CovMapFunctionRecord {
#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};
LLVM_PACKED_END
// Per module coverage mapping data header, i.e. CoverageMapFileHeader
// documented above.
struct CovMapHeader {
#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};
LLVM_PACKED_END
}
} // end namespace llvm

View File

@ -1,4 +1,4 @@
/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
|*
|* The LLVM Compiler Infrastructure
|*
@ -167,6 +167,25 @@ COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
#undef COVMAP_FUNC_RECORD
/* COVMAP_FUNC_RECORD end. */
/* COVMAP_HEADER start */
/* Definition of member fields of coverage map header.
*/
#ifndef COVMAP_HEADER
#define COVMAP_HEADER(Type, LLVMType, Name, Initializer)
#else
#define INSTR_PROF_DATA_DEFINED
#endif
COVMAP_HEADER(uint32_t, Int32Ty, NRecords, \
llvm::ConstantInt::get(Int32Ty, FunctionRecords.size()))
COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
llvm::ConstantInt::get(Int32Ty, FilenamesSize))
COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
COVMAP_HEADER(uint32_t, Int32Ty, Version, \
llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
#undef COVMAP_HEADER
/* COVMAP_HEADER end. */
#ifdef INSTR_PROF_VALUE_PROF_DATA
#define INSTR_PROF_DATA_DEFINED

View File

@ -213,6 +213,7 @@ ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
// Non-standard Arch names.
ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)

View File

@ -130,7 +130,7 @@ struct ProcessInfo {
/// Return true if the given arguments fit within system-specific
/// argument length limits.
bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
bool commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args);
/// File encoding options when writing contents that a non-UTF8 tool will
/// read (on Windows systems). For UNIX, we always use UTF-8.

View File

@ -305,7 +305,7 @@ class KeyValueNode final : public Node {
/// increment() which must set CurrentEntry to 0 to create an end iterator.
template <class BaseT, class ValueT>
class basic_collection_iterator
: public std::iterator<std::forward_iterator_tag, ValueT> {
: public std::iterator<std::input_iterator_tag, ValueT> {
public:
basic_collection_iterator() : Base(nullptr) {}
basic_collection_iterator(BaseT *B) : Base(B) {}
@ -326,11 +326,24 @@ class basic_collection_iterator
return Base->CurrentEntry;
}
/// Note on EqualityComparable:
///
/// The iterator is not re-entrant,
/// it is meant to be used for parsing YAML on-demand
/// Once iteration started - it can point only to one entry at a time
/// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
/// iff Base and Other.Base are equal.
bool operator==(const basic_collection_iterator &Other) const {
if (Base && (Base == Other.Base)) {
assert((Base->CurrentEntry == Other.Base->CurrentEntry)
&& "Equal Bases expected to point to equal Entries");
}
return Base == Other.Base;
}
bool operator!=(const basic_collection_iterator &Other) const {
if (Base != Other.Base)
return true;
return (Base && Other.Base) &&
Base->CurrentEntry != Other.Base->CurrentEntry;
return !(Base == Other.Base);
}
basic_collection_iterator &operator++() {

View File

@ -232,7 +232,7 @@ class Init {
/// We could pack these a bit tighter by not having the IK_FirstXXXInit
/// and IK_LastXXXInit be their own values, but that would degrade
/// readability for really no benefit.
enum InitKind {
enum InitKind : uint8_t {
IK_BitInit,
IK_FirstTypedInit,
IK_BitsInit,
@ -256,6 +256,9 @@ class Init {
private:
const InitKind Kind;
protected:
uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit
private:
Init(const Init &) = delete;
Init &operator=(const Init &) = delete;
virtual void anchor();
@ -264,7 +267,7 @@ class Init {
InitKind getKind() const { return Kind; }
protected:
explicit Init(InitKind K) : Kind(K) {}
explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}
public:
virtual ~Init() {}
@ -365,7 +368,8 @@ class TypedInit : public Init {
TypedInit &operator=(const TypedInit &Other) = delete;
protected:
explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
: Init(K, Opc), Ty(T) {}
~TypedInit() override {
// If this is a DefInit we need to delete the RecordRecTy.
if (getKind() == IK_DefInit)
@ -650,7 +654,8 @@ class OpInit : public TypedInit {
OpInit &operator=(OpInit &Other) = delete;
protected:
explicit OpInit(InitKind K, RecTy *Type) : TypedInit(K, Type) {}
explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc)
: TypedInit(K, Type, Opc) {}
public:
static bool classof(const Init *I) {
@ -677,14 +682,13 @@ class OpInit : public TypedInit {
///
class UnOpInit : public OpInit {
public:
enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
enum UnaryOp : uint8_t { CAST, HEAD, TAIL, EMPTY };
private:
UnaryOp Opc;
Init *LHS;
UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
: OpInit(IK_UnOpInit, Type), Opc(opc), LHS(lhs) {}
: OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {}
UnOpInit(const UnOpInit &Other) = delete;
UnOpInit &operator=(const UnOpInit &Other) = delete;
@ -708,7 +712,7 @@ class UnOpInit : public OpInit {
return getOperand();
}
UnaryOp getOpcode() const { return Opc; }
UnaryOp getOpcode() const { return (UnaryOp)Opc; }
Init *getOperand() const { return LHS; }
// Fold - If possible, fold this to a simpler init. Return this if not
@ -724,14 +728,14 @@ class UnOpInit : public OpInit {
///
class BinOpInit : public OpInit {
public:
enum BinaryOp { ADD, AND, SHL, SRA, SRL, LISTCONCAT, STRCONCAT, CONCAT, EQ };
enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT,
STRCONCAT, CONCAT, EQ };
private:
BinaryOp Opc;
Init *LHS, *RHS;
BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
OpInit(IK_BinOpInit, Type), Opc(opc), LHS(lhs), RHS(rhs) {}
OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {}
BinOpInit(const BinOpInit &Other) = delete;
BinOpInit &operator=(const BinOpInit &Other) = delete;
@ -759,7 +763,7 @@ class BinOpInit : public OpInit {
}
}
BinaryOp getOpcode() const { return Opc; }
BinaryOp getOpcode() const { return (BinaryOp)Opc; }
Init *getLHS() const { return LHS; }
Init *getRHS() const { return RHS; }
@ -776,15 +780,14 @@ class BinOpInit : public OpInit {
///
class TernOpInit : public OpInit {
public:
enum TernaryOp { SUBST, FOREACH, IF };
enum TernaryOp : uint8_t { SUBST, FOREACH, IF };
private:
TernaryOp Opc;
Init *LHS, *MHS, *RHS;
TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs,
RecTy *Type) :
OpInit(IK_TernOpInit, Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
TernOpInit(const TernOpInit &Other) = delete;
TernOpInit &operator=(const TernOpInit &Other) = delete;
@ -815,7 +818,7 @@ class TernOpInit : public OpInit {
}
}
TernaryOp getOpcode() const { return Opc; }
TernaryOp getOpcode() const { return (TernaryOp)Opc; }
Init *getLHS() const { return LHS; }
Init *getMHS() const { return MHS; }
Init *getRHS() const { return RHS; }

View File

@ -936,6 +936,10 @@ class AsmParser {
// ShouldEmitMatchRegisterName - Set to false if the target needs a hand
// written register name matcher
bit ShouldEmitMatchRegisterName = 1;
// HasMnemonicFirst - Set to false if target instructions don't always
// start with a mnemonic as the first token.
bit HasMnemonicFirst = 1;
}
def DefaultAsmParser : AsmParser;

View File

@ -2269,6 +2269,12 @@ class TargetLowering : public TargetLoweringBase {
return false;
}
/// Return true if the MachineFunction contains a COPY which would imply
/// HasOpaqueSPAdjustment.
virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const {
return false;
}
/// Perform necessary initialization to handle a subset of CSRs explicitly
/// via copies. This function is called at the beginning of instruction
/// selection.

View File

@ -23,11 +23,13 @@
namespace llvm {
/// This optimization identifies DIV instructions that can be
/// This optimization identifies DIV instructions in a BB that can be
/// profitably bypassed and carried out with a shorter, faster divide.
bool bypassSlowDivision(Function &F,
Function::iterator &I,
const DenseMap<unsigned int, unsigned int> &BypassWidth);
///
/// This optimization may add basic blocks immediately after BB; for obvious
/// reasons, you shouldn't pass those blocks to bypassSlowDivision.
bool bypassSlowDivision(
BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidth);
} // End llvm namespace

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@ -39,6 +40,8 @@ struct LICMSafetyInfo {
bool MayThrow; // The current loop contains an instruction which
// may throw.
bool HeaderMayThrow; // Same as previous, but specific to loop header
// Used to update funclet bundle operands.
DenseMap<BasicBlock *, ColorVector> BlockColors;
LICMSafetyInfo() : MayThrow(false), HeaderMayThrow(false)
{}
};

View File

@ -543,7 +543,6 @@ static bool isMemsetPattern16(const Function *MS,
isa<IntegerType>(MemsetType->getParamType(2)))
return true;
}
return false;
}
@ -583,9 +582,6 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
if (F->onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
if (isMemsetPattern16(F, TLI))
Min = FMRB_OnlyAccessesArgumentPointees;
// Otherwise be conservative.
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
@ -599,22 +595,21 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
return ArgIdx ? MRI_Ref : MRI_Mod;
// We don't currently have a writeonly attribute. All other properties
// of these intrinsics are nicely described via attributes in
// Intrinsics.td and handled generically below.
if (ArgIdx == 0)
return MRI_Mod;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
// for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
if (CS.getCalledFunction() &&
isMemsetPattern16(CS.getCalledFunction(), TLI)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
return ArgIdx ? MRI_Ref : MRI_Mod;
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
// whenever possible. Note that all but the missing writeonly attribute are
// handled via InferFunctionAttr.
if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
if (ArgIdx == 0)
return MRI_Mod;
if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
return MRI_Ref;

View File

@ -376,15 +376,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else {
return true; // Argument of an unknown call.
}
// If the Callee is not ReadNone, it may read the global,
// and if it is not ReadOnly, it may also write to it.
Function *CalleeF = CS.getCalledFunction();
if (!CalleeF->doesNotAccessMemory()) {
if (Readers)
Readers->insert(CalleeF);
if (Writers && !CalleeF->onlyReadsMemory())
Writers->insert(CalleeF);
}
}
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
@ -516,7 +507,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
if (F->isDeclaration()) {
// Try to get mod/ref behaviour from function attributes.
if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
if (F->doesNotAccessMemory()) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
FI.addModRefInfo(MRI_Ref);
@ -524,12 +515,6 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
} else if (F->onlyAccessesArgMemory() ||
F->onlyAccessesInaccessibleMemOrArgMem()) {
// This function may only access (read/write) memory pointed to by its
// arguments. If this pointer is to a global, this escaping use of the
// pointer is captured in AnalyzeUsesOfPointer().
FI.addModRefInfo(MRI_ModRef);
} else {
FI.addModRefInfo(MRI_ModRef);
// Can't say anything useful unless it's an intrinsic - they don't

View File

@ -187,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
}
/// \brief Tests if a value is a call or invoke to a library function that
/// allocates memory and never returns null (such as operator new).
bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
}
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.

View File

@ -477,7 +477,7 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
// being 42. A key property of this program however is that if either
// 1 or 4 were missing, there would be a race between the store of 42
// either the store of 0 or the load (making the whole progam racy).
// The paper mentionned above shows that the same property is respected
// The paper mentioned above shows that the same property is respected
// by every program that can detect any optimisation of that kind: either
// it is racy (undefined) or there is a release followed by an acquire
// between the pair of accesses under consideration.
@ -685,13 +685,13 @@ MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
return MemDepResult::getDef(Inst);
if (isInvariantLoad)
continue;
// Be conservative if the accessed pointer may alias the allocation.
if (AA->alias(Inst, AccessPtr) != NoAlias)
return MemDepResult::getClobber(Inst);
// If the allocation is not aliased and does not read memory (like
// strdup), it is safe to ignore.
if (isa<AllocaInst>(Inst) ||
isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
if ((AA->alias(Inst, AccessPtr) == NoAlias) &&
// If the allocation is not aliased and does not read memory (like
// strdup), it is safe to ignore.
(isa<AllocaInst>(Inst) || isMallocLikeFn(Inst, TLI) ||
isCallocLikeFn(Inst, TLI)))
continue;
}
@ -792,10 +792,8 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
int Count = -1) {
if (Count == -1) Count = Cache.size();
if (Count == 0) return;
for (unsigned i = 1; i != unsigned(Count); ++i)
assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
"Cache isn't sorted!");
}
#endif

View File

@ -52,14 +52,13 @@ static bool hasSinCosPiStret(const Triple &T) {
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
const char *const *StandardNames) {
#ifndef NDEBUG
ArrayRef<const char *> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
}
#endif // !NDEBUG
assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
[](const char *LHS, const char *RHS) {
return strcmp(LHS, RHS) < 0;
}) &&
"TargetLibraryInfoImpl function names must be sorted");
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {

View File

@ -1743,9 +1743,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
return false;
Value *X = nullptr, *Y = nullptr;
// A shift of a power of two is a power of two or zero.
// A shift left or a logical shift right of a power of two is a power of two
// or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
match(V, m_LShr(m_Value(X), m_Value()))))
return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
@ -2829,7 +2830,12 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const DataLayout &DL) {
unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
APInt ByteOffset(BitWidth, 0);
while (1) {
// We walk up the defs but use a visited set to handle unreachable code. In
// that case, we stop after accumulating the cycle once (not that it
// matters).
SmallPtrSet<Value *, 16> Visited;
while (Visited.insert(Ptr).second) {
if (Ptr->getType()->isVectorTy())
break;
@ -3268,12 +3274,9 @@ static bool isDereferenceableAndAlignedPointer(
}
// For gc.relocate, look through relocations
if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
GCRelocateOperands RelocateInst(I);
return isDereferenceableAndAlignedPointer(
RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
}
if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
return isDereferenceableAndAlignedPointer(
RelocateInst->getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
@ -3474,10 +3477,6 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (CS.isReturnNonNull())
return true;
// operator new never returns null.
if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
return true;
return false;
}

View File

@ -3071,7 +3071,12 @@ void BitcodeReader::saveMetadataList(
for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
Metadata *MD = MetadataList[ID];
auto *N = dyn_cast_or_null<MDNode>(MD);
assert((!N || (N->isResolved() || N->isTemporary())) &&
"Found non-resolved non-temp MDNode while saving metadata");
// Save all values if !OnlyTempMD, otherwise just the temporary metadata.
// Note that in the !OnlyTempMD case we need to save all Metadata, not
// just MDNode, as we may have references to other types of module-level
// metadata (e.g. ValueAsMetadata) from instructions.
if (!OnlyTempMD || (N && N->isTemporary())) {
// Will call this after materializing each function, in order to
// handle remapping of the function's instructions/metadata.
@ -3080,6 +3085,11 @@ void BitcodeReader::saveMetadataList(
assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
continue;
}
if (N && N->isTemporary())
// Ensure that we assert if someone tries to RAUW this temporary
// metadata while it is the key of a map. The flag will be set back
// to true when the saved metadata list is destroyed.
N->setCanReplace(false);
MetadataToIDs[MD] = ID;
}
}

View File

@ -976,32 +976,32 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
}
}
static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
int Rank = 0;
while (State != -1) {
++Rank;
State = FuncInfo.ClrEHUnwindMap[State].Parent;
State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
}
return Rank;
}
static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
int LeftRank = getRank(FuncInfo, Left);
int RightRank = getRank(FuncInfo, Right);
static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
int LeftRank = getTryRank(FuncInfo, Left);
int RightRank = getTryRank(FuncInfo, Right);
while (LeftRank < RightRank) {
Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
--RightRank;
}
while (RightRank < LeftRank) {
Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
--LeftRank;
}
while (Left != Right) {
Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
}
return Left;
@ -1035,9 +1035,9 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
HandlerStates[HandlerBlock] = State;
// Use this loop through all handlers to verify our assumption (used in
// the MinEnclosingState computation) that ancestors have lower state
// numbers than their descendants.
assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
// the MinEnclosingState computation) that enclosing funclets have lower
// state numbers than their enclosed funclets.
assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
"ill-formed state numbering");
}
// Map the main function to the NullState.
@ -1070,7 +1070,6 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
// Visit the root function and each funclet.
for (MachineFunction::const_iterator FuncletStart = MF->begin(),
FuncletEnd = MF->begin(),
End = MF->end();
@ -1100,17 +1099,18 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
for (const auto &StateChange :
InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
// Close any try regions we're not still under
int AncestorState =
getAncestor(FuncInfo, CurrentState, StateChange.NewState);
while (CurrentState != AncestorState) {
assert(CurrentState != NullState && "Failed to find ancestor!");
int StillPendingState =
getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
while (CurrentState != StillPendingState) {
assert(CurrentState != NullState &&
"Failed to find still-pending state!");
// Close the pending clause
Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
CurrentState, FuncletState});
// Now the parent handler is current
CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
// Now the next-outer try region is current
CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
// Pop the new start label from the handler stack if we've exited all
// descendants of the corresponding handler.
// inner try regions of the corresponding try region.
if (HandlerStack.back().second == CurrentState)
CurrentStartLabel = HandlerStack.pop_back_val().first;
}
@ -1121,7 +1121,8 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
// it.
for (int EnteredState = StateChange.NewState;
EnteredState != CurrentState;
EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
EnteredState =
FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
int &MinEnclosingState = MinClauseMap[EnteredState];
if (FuncletState < MinEnclosingState)
MinEnclosingState = FuncletState;

View File

@ -225,8 +225,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
BasicBlock* BB = &*F.begin();
while (BB != nullptr) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
BasicBlock* Next = BB->getNextNode();
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
BB = Next;
}
}
// Eliminate blocks that contain only PHI nodes and an
@ -526,19 +532,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Computes a map of base pointer relocation instructions to corresponding
// derived pointer relocation instructions given a vector of all relocate calls
static void computeBaseDerivedRelocateMap(
const SmallVectorImpl<User *> &AllRelocateCalls,
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
RelocateInstMap) {
const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
&RelocateInstMap) {
// Collect information in two maps: one primarily for locating the base object
// while filling the second map; the second map is the final structure holding
// a mapping between Base and corresponding Derived relocate calls
DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
for (auto &U : AllRelocateCalls) {
GCRelocateOperands ThisRelocate(U);
IntrinsicInst *I = cast<IntrinsicInst>(U);
auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
ThisRelocate.getDerivedPtrIndex());
RelocateIdxMap.insert(std::make_pair(K, I));
DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
for (auto *ThisRelocate : AllRelocateCalls) {
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
ThisRelocate->getDerivedPtrIndex());
RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
}
for (auto &Item : RelocateIdxMap) {
std::pair<unsigned, unsigned> Key = Item.first;
@ -546,7 +550,7 @@ static void computeBaseDerivedRelocateMap(
// Base relocation: nothing to insert
continue;
IntrinsicInst *I = Item.second;
GCRelocateInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
// We're iterating over RelocateIdxMap so we cannot modify it.
@ -579,16 +583,13 @@ static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
// replace, computes a replacement, and affects it.
static bool
simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
const SmallVectorImpl<IntrinsicInst *> &Targets) {
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
const SmallVectorImpl<GCRelocateInst *> &Targets) {
bool MadeChange = false;
for (auto &ToReplace : Targets) {
GCRelocateOperands MasterRelocate(RelocatedBase);
GCRelocateOperands ThisRelocate(ToReplace);
assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
for (GCRelocateInst *ToReplace : Targets) {
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
"Not relocating a derived object of the original base object");
if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
// A duplicate relocate call. TODO: coalesce duplicates.
continue;
}
@ -601,8 +602,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
continue;
}
Value *Base = ThisRelocate.getBasePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
Value *Base = ToReplace->getBasePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
@ -680,12 +681,12 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// %val = load %ptr'
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
bool MadeChange = false;
SmallVector<User *, 2> AllRelocateCalls;
SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
for (auto *U : I.users())
if (isGCRelocate(dyn_cast<Instruction>(U)))
if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
// Collect all the relocate calls associated with a statepoint
AllRelocateCalls.push_back(U);
AllRelocateCalls.push_back(Relocate);
// We need atleast one base pointer relocation + one derived pointer
// relocation to mangle
@ -694,7 +695,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
// RelocateInstMap is a mapping from the base relocate instruction to the
// corresponding derived relocate instructions
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
if (RelocateInstMap.empty())
return false;

View File

@ -122,8 +122,7 @@ INITIALIZE_PASS_END(MachineCSE, "machine-cse",
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
@ -186,8 +185,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return true;
bool SeenDef = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
for (const MachineOperand &MO : I->operands()) {
if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
SeenDef = true;
if (!MO.isReg() || !MO.getReg())
@ -220,8 +218,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
SmallVectorImpl<unsigned> &PhysDefs,
bool &PhysUseDef) const{
// First, add all uses to PhysRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
@ -239,8 +236,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// (which currently contains only uses), set the PhysUseDef flag.
PhysUseDef = false;
MachineBasicBlock::const_iterator I = MI; I = std::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@ -311,8 +307,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
if (I == E)
return true;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
for (const MachineOperand &MO : I->operands()) {
// RegMasks go on instructions like calls that clobber lots of physregs.
// Don't attempt to CSE across such an instruction.
if (MO.isRegMask())
@ -398,8 +393,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #2: If the expression doesn't not use a vr and the only use
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isUse() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
@ -580,9 +574,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
unsigned OldReg = CSEPairs[i].first;
unsigned NewReg = CSEPairs[i].second;
for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
unsigned OldReg = CSEPair.first;
unsigned NewReg = CSEPair.second;
// OldReg may have been unused but is used now, clear the Dead flag
MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
assert(Def != nullptr && "CSEd register has no unique definition?");
@ -594,8 +588,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
// we should make sure it is not dead at CSMI.
for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
// their uses in all the instructions between CSMI and MI.
@ -685,18 +679,14 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
Node = WorkList.pop_back_val();
Scopes.push_back(Node);
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
unsigned NumChildren = Children.size();
OpenChildren[Node] = NumChildren;
for (unsigned i = 0; i != NumChildren; ++i) {
MachineDomTreeNode *Child = Children[i];
OpenChildren[Node] = Children.size();
for (MachineDomTreeNode *Child : Children)
WorkList.push_back(Child);
}
} while (!WorkList.empty());
// Now perform CSE.
bool Changed = false;
for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
MachineDomTreeNode *Node = Scopes[i];
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
Changed |= ProcessBlock(MBB);

View File

@ -866,6 +866,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
setMemRefs(NewMemRefs, NewMemRefs + NewNum);
}
std::pair<MachineInstr::mmo_iterator, unsigned>
MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
// TODO: If we end up with too many memory operands, return the empty
// conservative set rather than failing asserts.
// TODO: consider uniquing elements within the operand lists to reduce
// space usage and fall back to conservative information less often.
size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
+ (Other.memoperands_end() - Other.memoperands_begin());
MachineFunction *MF = getParent()->getParent();
mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
MemBegin);
MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
MemEnd);
assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
"missing memrefs");
return std::make_pair(MemBegin, CombinedNumMemRefs);
}
bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
assert(!isBundledWithPred() && "Must be called on bundle header");
for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
@ -1738,7 +1759,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool HaveSemi = false;
const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
}
OS << " flags: ";
if (Flags & FrameSetup)
@ -1749,7 +1773,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (!memoperands_empty()) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
}
OS << " mem:";
for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
@ -1762,7 +1789,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print the regclass of any virtual registers encountered.
if (MRI && !VirtRegs.empty()) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
}
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
OS << " " << TRI->getRegClassName(RC)
@ -1781,7 +1811,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print debug location information.
if (isDebugValue() && getOperand(e - 2).isMetadata()) {
if (!HaveSemi) OS << ";";
if (!HaveSemi)
OS << ";";
auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
OS << " line no:" << DV->getLine();
if (auto *InlinedAt = debugLoc->getInlinedAt()) {
@ -1795,7 +1826,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (isIndirectDebugValue())
OS << " indirect";
} else if (debugLoc && MF) {
if (!HaveSemi) OS << ";";
if (!HaveSemi)
OS << ";";
OS << " dbg:";
debugLoc.print(OS);
}

View File

@ -315,7 +315,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!TRI->regsOverlap(MOReg, Reg))
continue;
bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
if (MO.readsReg()) {
PRI.Read = true;
if (Covered) {

View File

@ -97,9 +97,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
CurrSetPressure[*PSetI] += Weight;
if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) {
P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI];
}
P.MaxSetPressure[*PSetI] =
std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
}
}
}

View File

@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
DAG.getConstant(PtrOff, DL, PtrType));
DAG.getConstant(PtrOff, DL, PtrType),
&Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;

View File

@ -2843,6 +2843,43 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
return (AZero | BZero).isAllOnesValue();
}
static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
llvm::SelectionDAG &DAG) {
if (Ops.size() == 1)
return Ops[0];
// Concat of UNDEFs is UNDEF.
if (std::all_of(Ops.begin(), Ops.end(),
[](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified
// to one big BUILD_VECTOR.
// FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well.
if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) {
return Op.getOpcode() == ISD::BUILD_VECTOR;
}))
return SDValue();
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 16> Elts;
for (SDValue Op : Ops)
Elts.append(Op->op_begin(), Op->op_end());
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT.bitsGT(VT.getScalarType()))
for (SDValue &Op : Elts)
Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, DL, SVT)
: DAG.getSExtOrTrunc(Op, DL, SVT);
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@ -3426,34 +3463,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (N2.getOpcode() == ISD::EntryToken) return N1;
if (N1 == N2) return N1;
break;
case ISD::CONCAT_VECTORS:
// Concat of UNDEFs is UNDEF.
if (N1.getOpcode() == ISD::UNDEF &&
N2.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT.bitsGT(VT.getScalarType()))
for (SDValue &Op : Elts)
Op = TLI->isZExtFree(Op.getValueType(), SVT)
? getZExtOrTrunc(Op, DL, SVT)
: getSExtOrTrunc(Op, DL, SVT);
return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
case ISD::AND:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
@ -3911,19 +3927,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
break;
}
case ISD::CONCAT_VECTORS:
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR &&
N3.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
case ISD::SETCC: {
// Use FoldSetCC to simplify SETCC's.
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
@ -5462,6 +5472,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
switch (Opcode) {
default: break;
case ISD::CONCAT_VECTORS: {
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
case ISD::SELECT_CC: {
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&

View File

@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SmallVector<SDValue, 4> Chains(NumValues);
for (unsigned i = 0; i != NumValues; ++i) {
SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
RetPtr.getValueType(), RetPtr,
DAG.getIntPtrConstant(Offsets[i],
getCurSDLoc()));
getCurSDLoc()),
&Flags);
Chains[i] =
DAG.getStore(Chain, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (Field) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
// In an inbouds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
DAG.getConstant(Offset, dl, N.getValueType()));
DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
}
Ty = StTy->getElementType(Field);
@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue OffsVal = VectorWidth ?
DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
DAG.getConstant(Offs, dl, PtrTy);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
// In an inbouds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
continue;
}
@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
Align = 0;
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size.
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl,
AllocSize.getValueType(), AllocSize,
DAG.getIntPtrConstant(StackAlign - 1, dl));
DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
// Mask out the low bits for alignment purposes.
AllocSize = DAG.getNode(ISD::AND, dl,
@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile)
Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
EVT PtrVT = Ptr.getValueType();
@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
SDValue A = DAG.getNode(ISD::ADD, dl,
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT));
DAG.getConstant(Offsets[i], dl, PtrVT),
&Flags);
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
isNonTemporal, isInvariant, Alignment, AAInfo,
@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
unsigned ChainI = 0;
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
ChainI = 0;
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT));
DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
SDValue St = DAG.getStore(Root, dl,
SDValue(Src.getNode(), Src.getResNo() + i),
Add, MachinePointerInfo(PtrV, Offsets[i]),
@ -5189,7 +5223,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::experimental_gc_relocate: {
visitGCRelocate(I);
visitGCRelocate(cast<GCRelocateInst>(I));
return nullptr;
}
case Intrinsic::instrprof_increment:
@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ReturnValues.resize(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
PtrVT));
PtrVT), &Flags);
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),

View File

@ -855,7 +855,7 @@ class SelectionDAGBuilder {
// These three are implemented in StatepointLowering.cpp
void visitStatepoint(const CallInst &I);
void visitGCRelocate(const CallInst &I);
void visitGCRelocate(const GCRelocateInst &I);
void visitGCResult(const CallInst &I);
void visitUserOp1(const Instruction &I) {

View File

@ -633,6 +633,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MRI.replaceRegWith(From, To);
}
if (TLI->hasCopyImplyingStackAdjustment(MF))
MFI->setHasOpaqueSPAdjustment(true);
// Freeze the set of reserved registers now that MachineFrameInfo has been
// set up. All the information required by getReservedRegs() should be
// available now.

View File

@ -128,13 +128,11 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return Optional<int>();
// Spill location is known for gc relocates
if (isGCRelocate(Val)) {
GCRelocateOperands RelocOps(cast<Instruction>(Val));
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()];
auto It = SpillMap.find(RelocOps.getDerivedPtr());
auto It = SpillMap.find(Relocate->getDerivedPtr());
if (It == SpillMap.end())
return Optional<int>();
@ -401,10 +399,10 @@ static void getIncomingStatepointGCValues(
SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
SelectionDAGBuilder &Builder) {
for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) {
Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction());
Bases.push_back(relocateOpers.getBasePtr());
Ptrs.push_back(relocateOpers.getDerivedPtr());
for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
Relocs.push_back(Relocate);
Bases.push_back(Relocate->getBasePtr());
Ptrs.push_back(Relocate->getDerivedPtr());
}
// Remove any redundant llvm::Values which map to the same SDValue as another
@ -602,8 +600,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) {
const Value *V = RelocateOpers.getDerivedPtr();
for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
const Value *V = Relocate->getDerivedPtr();
SDValue SDV = Builder.getValue(V);
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
@ -624,8 +622,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// uses of the corresponding values so that it would automatically
// export them. Relocates of the spilled values does not use original
// value.
if (RelocateOpers.getUnderlyingCallSite().getParent() !=
StatepointInstr->getParent())
if (Relocate->getParent() != StatepointInstr->getParent())
Builder.ExportFromCurrentBlock(V);
}
}
@ -656,7 +653,7 @@ void SelectionDAGBuilder::LowerStatepoint(
// statepoint.
for (const User *U : CS->users()) {
const CallInst *Call = cast<CallInst>(U);
if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent())
StatepointLowering.scheduleRelocCall(*Call);
}
#endif
@ -859,24 +856,22 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
}
}
void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
GCRelocateOperands RelocateOpers(&CI);
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
#ifndef NDEBUG
// Consistency check
// We skip this check for relocates not in the same basic block as thier
// statepoint. It would be too expensive to preserve validation info through
// different basic blocks.
if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
StatepointLowering.relocCallVisited(CI);
if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) {
StatepointLowering.relocCallVisited(Relocate);
}
#endif
const Value *DerivedPtr = RelocateOpers.getDerivedPtr();
const Value *DerivedPtr = Relocate.getDerivedPtr();
SDValue SD = getValue(DerivedPtr);
FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()];
FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()];
// We should have recorded location for this pointer
assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
@ -885,7 +880,7 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
if (!DerivedPtrLocation) {
setValue(&CI, SD);
setValue(&Relocate, SD);
return;
}
@ -907,5 +902,5 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
DAG.setRoot(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
setValue(&CI, SpillLoad);
setValue(&Relocate, SpillLoad);
}

View File

@ -212,7 +212,7 @@ unsigned TargetSchedModel::computeOperandLatency(
&& !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
&& SchedModel.isComplete()) {
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
<< *DefMI;
<< *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
llvm_unreachable("incomplete machine model");
}
#endif

View File

@ -17,11 +17,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@ -435,11 +438,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
calculateStateNumbersForInvokes(Fn, FuncInfo);
}
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
ClrHandlerType HandlerType, uint32_t TypeToken,
const BasicBlock *Handler) {
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
int TryParentState, ClrHandlerType HandlerType,
uint32_t TypeToken, const BasicBlock *Handler) {
ClrEHUnwindMapEntry Entry;
Entry.Parent = ParentState;
Entry.HandlerParentState = HandlerParentState;
Entry.TryParentState = TryParentState;
Entry.Handler = Handler;
Entry.HandlerType = HandlerType;
Entry.TypeToken = TypeToken;
@ -453,82 +457,199 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
if (!FuncInfo.EHPadStateMap.empty())
return;
SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
// This numbering assigns one state number to each catchpad and cleanuppad.
// It also computes two tree-like relations over states:
// 1) Each state has a "HandlerParentState", which is the state of the next
// outer handler enclosing this state's handler (same as nearest ancestor
// per the ParentPad linkage on EH pads, but skipping over catchswitches).
// 2) Each state has a "TryParentState", which:
// a) for a catchpad that's not the last handler on its catchswitch, is
// the state of the next catchpad on that catchswitch
// b) for all other pads, is the state of the pad whose try region is the
// next outer try region enclosing this state's try region. The "try
// regions are not present as such in the IR, but will be inferred
// based on the placement of invokes and pads which reach each other
// by exceptional exits
// Catchswitches do not get their own states, but each gets mapped to the
// state of its first catchpad.
// Each pad needs to be able to refer to its parent, so scan the function
// looking for top-level handlers and seed the worklist with them.
// Step one: walk down from outermost to innermost funclets, assigning each
// catchpad and cleanuppad a state number. Add an entry to the
// ClrEHUnwindMap for each state, recording its HandlerParentState and
// handler attributes. Record the TryParentState as well for each catchpad
// that's not the last on its catchswitch, but initialize all other entries'
// TryParentStates to a sentinel -1 value that the next pass will update.
// Seed a worklist with pads that have no parent.
SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
for (const BasicBlock &BB : *Fn) {
if (!BB.isEHPad())
continue;
if (BB.isLandingPad())
report_fatal_error("CoreCLR EH cannot use landingpads");
const Instruction *FirstNonPHI = BB.getFirstNonPHI();
if (!isTopLevelPadForMSVC(FirstNonPHI))
const Value *ParentPad;
if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
ParentPad = CPI->getParentPad();
else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
ParentPad = CSI->getParentPad();
else
continue;
// queue this with sentinel parent state -1 to mean unwind to caller.
Worklist.emplace_back(FirstNonPHI, -1);
if (isa<ConstantTokenNone>(ParentPad))
Worklist.emplace_back(FirstNonPHI, -1);
}
// Use the worklist to visit all pads, from outer to inner. Record
// HandlerParentState for all pads. Record TryParentState only for catchpads
// that aren't the last on their catchswitch (setting all other entries'
// TryParentStates to an initial value of -1). This loop is also responsible
// for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
// catchswitches.
while (!Worklist.empty()) {
const Instruction *Pad;
int ParentState;
std::tie(Pad, ParentState) = Worklist.pop_back_val();
int HandlerParentState;
std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();
Value *ParentPad;
int PredState;
if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
// A cleanup can have multiple exits; don't re-process after the first.
if (FuncInfo.EHPadStateMap.count(Cleanup))
continue;
// CoreCLR personality uses arity to distinguish faults from finallies.
const BasicBlock *PadBlock = Cleanup->getParent();
if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
// Create the entry for this cleanup with the appropriate handler
// properties. Finaly and fault handlers are distinguished by arity.
ClrHandlerType HandlerType =
(Cleanup->getNumOperands() ? ClrHandlerType::Fault
: ClrHandlerType::Finally);
int NewState =
addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
FuncInfo.EHPadStateMap[Cleanup] = NewState;
// Propagate the new state to all preds of the cleanup
ParentPad = Cleanup->getParentPad();
PredState = NewState;
} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
SmallVector<const CatchPadInst *, 1> Handlers;
for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
Handlers.push_back(Catch);
}
FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
int NewState = ParentState;
for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
HandlerI != HandlerE; ++HandlerI) {
const CatchPadInst *Catch = *HandlerI;
const BasicBlock *PadBlock = Catch->getParent();
(Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
: ClrHandlerType::Finally);
int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
HandlerType, 0, Pad->getParent());
// Queue any child EH pads on the worklist.
for (const User *U : Cleanup->users())
if (const auto *I = dyn_cast<Instruction>(U))
if (I->isEHPad())
Worklist.emplace_back(I, CleanupState);
// Remember this pad's state.
FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
} else {
// Walk the handlers of this catchswitch in reverse order since all but
// the last need to set the following one as its TryParentState.
const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
int CatchState = -1, FollowerState = -1;
SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
CBI != CBE; ++CBI, FollowerState = CatchState) {
const BasicBlock *CatchBlock = *CBI;
// Create the entry for this catch with the appropriate handler
// properties.
const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
uint32_t TypeToken = static_cast<uint32_t>(
cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
TypeToken, PadBlock);
FuncInfo.EHPadStateMap[Catch] = NewState;
CatchState =
addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
ClrHandlerType::Catch, TypeToken, CatchBlock);
// Queue any child EH pads on the worklist.
for (const User *U : Catch->users())
if (const auto *I = dyn_cast<Instruction>(U))
if (I->isEHPad())
Worklist.emplace_back(I, CatchState);
// Remember this catch's state.
FuncInfo.EHPadStateMap[Catch] = CatchState;
}
for (const auto *CatchPad : Handlers) {
for (const User *U : CatchPad->users()) {
const auto *UserI = cast<Instruction>(U);
if (UserI->isEHPad())
Worklist.emplace_back(UserI, ParentState);
}
}
PredState = NewState;
ParentPad = CatchSwitch->getParentPad();
} else {
llvm_unreachable("Unexpected EH pad");
}
// Queue all predecessors with the given state
for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
// Associate the catchswitch with the state of its first catch.
assert(CatchSwitch->getNumHandlers());
FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
}
}
// Step two: record the TryParentState of each state. For cleanuppads that
// don't have cleanuprets, we may need to infer this from their child pads,
// so visit pads in descendant-most to ancestor-most order.
for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
End = FuncInfo.ClrEHUnwindMap.rend();
Entry != End; ++Entry) {
const Instruction *Pad =
Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
// For most pads, the TryParentState is the state associated with the
// unwind dest of exceptional exits from it.
const BasicBlock *UnwindDest;
if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
// If a catch is not the last in its catchswitch, its TryParentState is
// the state associated with the next catch in the switch, even though
// that's not the unwind dest of exceptions escaping the catch. Those
// cases were already assigned a TryParentState in the first pass, so
// skip them.
if (Entry->TryParentState != -1)
continue;
// Otherwise, get the unwind dest from the catchswitch.
UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
} else {
const auto *Cleanup = cast<CleanupPadInst>(Pad);
UnwindDest = nullptr;
for (const User *U : Cleanup->users()) {
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
// Common and unambiguous case -- cleanupret indicates cleanup's
// unwind dest.
UnwindDest = CleanupRet->getUnwindDest();
break;
}
// Get an unwind dest for the user
const BasicBlock *UserUnwindDest = nullptr;
if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
UserUnwindDest = Invoke->getUnwindDest();
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
UserUnwindDest = CatchSwitch->getUnwindDest();
} else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
int UserUnwindState =
FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
if (UserUnwindState != -1)
UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
.Handler.get<const BasicBlock *>();
}
// Not having an unwind dest for this user might indicate that it
// doesn't unwind, so can't be taken as proof that the cleanup itself
// may unwind to caller (see e.g. SimplifyUnreachable and
// RemoveUnwindEdge).
if (!UserUnwindDest)
continue;
// Now we have an unwind dest for the user, but we need to see if it
// unwinds all the way out of the cleanup or if it stays within it.
const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
const Value *UserUnwindParent;
if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
UserUnwindParent = CSI->getParentPad();
else
UserUnwindParent =
cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
// The unwind stays within the cleanup iff it targets a child of the
// cleanup.
if (UserUnwindParent == Cleanup)
continue;
// This unwind exits the cleanup, so its dest is the cleanup's dest.
UnwindDest = UserUnwindDest;
break;
}
}
// Record the state of the unwind dest as the TryParentState.
int UnwindDestState;
// If UnwindDest is null at this point, either the pad in question can
// be exited by unwind to caller, or it cannot be exited by unwind. In
// either case, reporting such cases as unwinding to caller is correct.
// This can lead to EH tables that "look strange" -- if this pad's is in
// a parent funclet which has other children that do unwind to an enclosing
// pad, the try region for this pad will be missing the "duplicate" EH
// clause entries that you'd expect to see covering the whole parent. That
// should be benign, since the unwind never actually happens. If it were
// an issue, we could add a subsequent pass that pushes unwind dests down
// from parents that have them to children that appear to unwind to caller.
if (!UnwindDest) {
UnwindDestState = -1;
} else {
UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
}
Entry->TryParentState = UnwindDestState;
}
// Step three: transfer information from pads to invokes.
calculateStateNumbersForInvokes(Fn, FuncInfo);
}
@ -597,6 +718,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
for (auto &Funclets : FuncletBlocks) {
BasicBlock *FuncletPadBB = Funclets.first;
std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
Value *FuncletToken;
if (FuncletPadBB == &F.getEntryBlock())
FuncletToken = ConstantTokenNone::get(F.getContext());
else
FuncletToken = FuncletPadBB->getFirstNonPHI();
std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
ValueToValueMapTy VMap;
@ -668,15 +794,44 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
RemapInstruction(&I, VMap,
RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
// Catchrets targeting cloned blocks need to be updated separately from
// the loop above because they are not in the current funclet.
SmallVector<CatchReturnInst *, 2> FixupCatchrets;
for (auto &BBMapping : Orig2Clone) {
BasicBlock *OldBlock = BBMapping.first;
BasicBlock *NewBlock = BBMapping.second;
FixupCatchrets.clear();
for (BasicBlock *Pred : predecessors(OldBlock))
if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
if (CatchRet->getParentPad() == FuncletToken)
FixupCatchrets.push_back(CatchRet);
for (CatchReturnInst *CatchRet : FixupCatchrets)
CatchRet->setSuccessor(NewBlock);
}
auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
unsigned NumPreds = PN->getNumIncomingValues();
for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
++PredIdx) {
BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
ColorVector &IncomingColors = BlockColors[IncomingBlock];
bool BlockInFunclet = IncomingColors.size() == 1 &&
IncomingColors.front() == FuncletPadBB;
if (IsForOldBlock != BlockInFunclet)
bool EdgeTargetsFunclet;
if (auto *CRI =
dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken);
} else {
ColorVector &IncomingColors = BlockColors[IncomingBlock];
assert(!IncomingColors.empty() && "Block not colored!");
assert((IncomingColors.size() == 1 ||
llvm::all_of(IncomingColors,
[&](BasicBlock *Color) {
return Color != FuncletPadBB;
})) &&
"Cloning should leave this funclet's blocks monochromatic");
EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
}
if (IsForOldBlock != EdgeTargetsFunclet)
continue;
PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
// Revisit the next entry.
@ -864,7 +1019,6 @@ void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
}
void WinEHPrepare::verifyPreparedFunclets(Function &F) {
// Recolor the CFG to verify that all is well.
for (BasicBlock &BB : F) {
size_t NumColors = BlockColors[&BB].size();
assert(NumColors == 1 && "Expected monochromatic BB!");
@ -872,12 +1026,8 @@ void WinEHPrepare::verifyPreparedFunclets(Function &F) {
report_fatal_error("Uncolored BB!");
if (NumColors > 1)
report_fatal_error("Multicolor BB!");
if (!DisableDemotion) {
bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
assert(!EHPadHasPHI && "EH Pad still has a PHI!");
if (EHPadHasPHI)
report_fatal_error("EH Pad still has a PHI!");
}
assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
"EH Pad still has a PHI!");
}
}
@ -896,12 +1046,17 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {
demotePHIsOnFunclets(F);
if (!DisableCleanups) {
DEBUG(verifyFunction(F));
removeImplausibleInstructions(F);
DEBUG(verifyFunction(F));
cleanupPreparedFunclets(F);
}
verifyPreparedFunclets(F);
DEBUG(verifyPreparedFunclets(F));
// Recolor the CFG to verify that all is well.
DEBUG(colorFunclets(F));
DEBUG(verifyPreparedFunclets(F));
BlockColors.clear();
FuncletBlocks.clear();

View File

@ -2060,7 +2060,7 @@ class AssemblyWriter {
// printGCRelocateComment - print comment after call to the gc.relocate
// intrinsic indicating base and derived pointer names.
void printGCRelocateComment(const Value &V);
void printGCRelocateComment(const GCRelocateInst &Relocate);
};
} // namespace
@ -2722,14 +2722,11 @@ void AssemblyWriter::printInstructionLine(const Instruction &I) {
/// printGCRelocateComment - print comment after call to the gc.relocate
/// intrinsic indicating base and derived pointer names.
void AssemblyWriter::printGCRelocateComment(const Value &V) {
assert(isGCRelocate(&V));
GCRelocateOperands GCOps(cast<Instruction>(&V));
void AssemblyWriter::printGCRelocateComment(const GCRelocateInst &Relocate) {
Out << " ; (";
writeOperand(GCOps.getBasePtr(), false);
writeOperand(Relocate.getBasePtr(), false);
Out << ", ";
writeOperand(GCOps.getDerivedPtr(), false);
writeOperand(Relocate.getDerivedPtr(), false);
Out << ")";
}
@ -2737,8 +2734,8 @@ void AssemblyWriter::printGCRelocateComment(const Value &V) {
/// which slot it occupies.
///
void AssemblyWriter::printInfoComment(const Value &V) {
if (isGCRelocate(&V))
printGCRelocateComment(V);
if (const auto *Relocate = dyn_cast<GCRelocateInst>(&V))
printGCRelocateComment(*Relocate);
if (AnnotationWriter)
AnnotationWriter->printInfoComment(V, Out);

View File

@ -641,14 +641,15 @@ AttributeSet AttributeSet::get(LLVMContext &C,
if (Attrs.empty())
return AttributeSet();
#ifndef NDEBUG
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
assert((!i || Attrs[i-1].first <= Attrs[i].first) &&
"Misordered Attributes list!");
assert(!Attrs[i].second.hasAttribute(Attribute::None) &&
"Pointless attribute!");
}
#endif
assert(std::is_sorted(Attrs.begin(), Attrs.end(),
[](const std::pair<unsigned, Attribute> &LHS,
const std::pair<unsigned, Attribute> &RHS) {
return LHS.first < RHS.first;
}) && "Misordered Attributes list!");
assert(std::none_of(Attrs.begin(), Attrs.end(),
[](const std::pair<unsigned, Attribute> &Pair) {
return Pair.second.hasAttribute(Attribute::None);
}) && "Pointless attribute!");
// Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
// list.

View File

@ -76,22 +76,21 @@ iplist<Instruction>::iterator Instruction::eraseFromParent() {
return getParent()->getInstList().erase(getIterator());
}
/// insertBefore - Insert an unlinked instructions into a basic block
/// immediately before the specified instruction.
/// Insert an unlinked instruction into a basic block immediately before the
/// specified instruction.
void Instruction::insertBefore(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
}
/// insertAfter - Insert an unlinked instructions into a basic block
/// immediately after the specified instruction.
/// Insert an unlinked instruction into a basic block immediately after the
/// specified instruction.
void Instruction::insertAfter(Instruction *InsertPos) {
InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
this);
}
/// moveBefore - Unlink this instruction from its current basic block and
/// insert it into the basic block that MovePos lives in, right before
/// MovePos.
/// Unlink this instruction from its current basic block and insert it into the
/// basic block that MovePos lives in, right before MovePos.
void Instruction::moveBefore(Instruction *MovePos) {
MovePos->getParent()->getInstList().splice(
MovePos->getIterator(), getParent()->getInstList(), getIterator());

View File

@ -609,20 +609,6 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
return setSuccessor(idx, B);
}
bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
return true;
// Operand bundles override attributes on the called function, but don't
// override attributes directly present on the invoke instruction.
if (isFnAttrDisallowedByOpBundle(A))
return false;
if (const Function *F = getCalledFunction())
return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
return false;
}
bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
@ -934,6 +920,17 @@ void CatchSwitchInst::addHandler(BasicBlock *Handler) {
getOperandList()[OpNo] = Handler;
}
void CatchSwitchInst::removeHandler(handler_iterator HI) {
// Move all subsequent handlers up one.
Use *EndDst = op_end() - 1;
for (Use *CurDst = HI.getCurrent(); CurDst != EndDst; ++CurDst)
*CurDst = *(CurDst + 1);
// Null out the last handler use.
*EndDst = nullptr;
setNumHungOffUseOperands(getNumOperands() - 1);
}
BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
return getSuccessor(idx);
}

View File

@ -190,6 +190,8 @@ void ReplaceableMetadataImpl::moveRef(void *Ref, void *New,
void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
assert(!(MD && isa<MDNode>(MD) && cast<MDNode>(MD)->isTemporary()) &&
"Expected non-temp node");
assert(CanReplace &&
"Attempted to replace Metadata marked for no replacement");
if (UseMap.empty())
return;
@ -555,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
resolve();
}
void MDNode::resolveCycles(bool MDMaterialized) {
void MDNode::resolveCycles(bool AllowTemps) {
if (isResolved())
return;
@ -568,7 +570,7 @@ void MDNode::resolveCycles(bool MDMaterialized) {
if (!N)
continue;
if (N->isTemporary() && !MDMaterialized)
if (N->isTemporary() && AllowTemps)
continue;
assert(!N->isTemporary() &&
"Expected all forward declarations to be resolved");

View File

@ -40,20 +40,7 @@ bool llvm::isStatepoint(const Value &inst) {
}
bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
if (!CS.getInstruction()) {
// This is not a call site
return false;
}
return isGCRelocate(CS.getInstruction());
}
bool llvm::isGCRelocate(const Value *inst) {
if (const CallInst *call = dyn_cast<CallInst>(inst)) {
if (const Function *F = call->getCalledFunction()) {
return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
}
}
return false;
return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
}
bool llvm::isGCResult(const ImmutableCallSite &CS) {

View File

@ -1657,14 +1657,14 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
const CallInst *Call = dyn_cast<const CallInst>(U);
Assert(Call, "illegal use of statepoint token", &CI, U);
if (!Call) continue;
Assert(isGCRelocate(Call) || isGCResult(Call),
Assert(isa<GCRelocateInst>(Call) || isGCResult(Call),
"gc.result or gc.relocate are the only value uses"
"of a gc.statepoint",
&CI, U);
if (isGCResult(Call)) {
Assert(Call->getArgOperand(0) == &CI,
"gc.result connected to wrong gc.statepoint", &CI, Call);
} else if (isGCRelocate(Call)) {
} else if (isa<GCRelocateInst>(Call)) {
Assert(Call->getArgOperand(0) == &CI,
"gc.relocate connected to wrong gc.statepoint", &CI, Call);
}
@ -3019,8 +3019,7 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
&CPI);
auto *ParentPad = CPI.getParentPad();
Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CleanupPadInst has an invalid parent.", &CPI);
User *FirstUser = nullptr;
@ -3077,10 +3076,17 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
}
auto *ParentPad = CatchSwitch.getParentPad();
Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CatchSwitchInst has an invalid parent.", ParentPad);
Assert(CatchSwitch.getNumHandlers() != 0,
"CatchSwitchInst cannot have empty handler list", &CatchSwitch);
for (BasicBlock *Handler : CatchSwitch.handlers()) {
Assert(isa<CatchPadInst>(Handler->getFirstNonPHI()),
"CatchSwitchInst handlers must be catchpads", &CatchSwitch, Handler);
}
visitTerminatorInst(CatchSwitch);
}
@ -3675,8 +3681,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Verify rest of the relocate arguments
GCRelocateOperands Ops(CS);
ImmutableCallSite StatepointCS(Ops.getStatepoint());
ImmutableCallSite StatepointCS(
cast<GCRelocateInst>(*CS.getInstruction()).getStatepoint());
// Both the base and derived must be piped through the safepoint
Value* Base = CS.getArgOperand(1);
@ -3731,14 +3737,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
// Relocated value must be a pointer type, but gc_relocate does not need to return the
// same pointer type as the relocated pointer. It can be casted to the correct type later
// if it's desired. However, they must have the same address space.
GCRelocateOperands Operands(CS);
Assert(Operands.getDerivedPtr()->getType()->isPointerTy(),
GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
"gc.relocate: relocated value must be a gc pointer", CS);
// gc_relocate return type must be a pointer type, and is verified earlier in
// VerifyIntrinsicType().
Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
cast<PointerType>(Operands.getDerivedPtr()->getType())->getAddressSpace(),
cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
break;
}

View File

@ -524,6 +524,23 @@ class IRLinker {
ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
}
~IRLinker() {
// In the case where we are not linking metadata, we unset the CanReplace
// flag on all temporary metadata in the MetadataToIDs map to ensure
// none was replaced while being a map key. Now that we are destructing
// the map, set the flag back to true, so that it is replaceable during
// metadata linking.
if (!shouldLinkMetadata()) {
for (auto MDI : MetadataToIDs) {
Metadata *MD = const_cast<Metadata *>(MDI.first);
MDNode *Node = dyn_cast<MDNode>(MD);
assert((Node && Node->isTemporary()) &&
"Found non-temp metadata in map when not linking metadata");
Node->setCanReplace(true);
}
}
}
bool run();
Value *materializeDeclFor(Value *V, bool ForAlias);
void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
@ -1111,7 +1128,8 @@ bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
// a function and before remapping metadata on instructions below
// in RemapInstruction, as the saved mapping is used to handle
// the temporary metadata hanging off instructions.
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
/* OnlyTempMD = */ true);
// Link in the prefix data.
if (Src.hasPrefixData())
@ -1514,7 +1532,8 @@ bool IRLinker::run() {
// Ensure metadata materialized
if (SrcM.getMaterializer()->materializeMetadata())
return true;
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
SrcM.getMaterializer()->saveMetadataList(MetadataToIDs,
/* OnlyTempMD = */ false);
}
linkNamedMDNodes();

View File

@ -514,13 +514,13 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCOS->EmitULEB128IntValue(1);
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4);
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, context.getDwarfVersion() >= 4
? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4);
if (context.getGenDwarfSectionSyms().size() > 1 &&
context.getDwarfVersion() >= 3) {
EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
EmitAbbrev(MCOS, dwarf::DW_AT_ranges, context.getDwarfVersion() >= 4
? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4);
} else {
EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);

View File

@ -1,4 +1,4 @@
//===-- MObjectFileInfo.cpp - Object File Information ---------------------===//
//===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
//
// The LLVM Compiler Infrastructure
//

View File

@ -63,31 +63,30 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
/// ToggleFeature - Toggle a feature and returns the re-computed feature
/// bits. This version will also change all implied bits.
FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
SubtargetFeatures Features;
FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
SubtargetFeatures Features;
FeatureBits = Features.ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
size_t NumProcs = ProcDesc.size();
assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
[](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
return strcmp(LHS.Key, RHS.Key) < 0;
}) &&
"Processor machine model table is not sorted");
// Find entry
const SubtargetInfoKV *Found =
std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
auto Found =
std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
if (CPU != "help") // Don't error if the user asked for help.
errs() << "'" << CPU
<< "' is not a recognized processor for this target"

View File

@ -160,10 +160,9 @@ void ClearImpliedBits(FeatureBitset &Bits,
}
}
/// ToggleFeature - Toggle a feature and returns the newly updated feature
/// bits.
FeatureBitset
SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
/// ToggleFeature - Toggle a feature and update the feature bits.
void
SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
// Find feature in table.
@ -186,12 +185,9 @@ SubtargetFeatures::ToggleFeature(FeatureBitset Bits, StringRef Feature,
<< "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
return Bits;
}
FeatureBitset
SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
assert(hasFlag(Feature));
@ -203,7 +199,7 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
if (FeatureEntry) {
// Enable/disable feature in bits
if (isEnabled(Feature)) {
Bits |= FeatureEntry->Value;
Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
SetImpliedBits(Bits, FeatureEntry, FeatureTable);
@ -218,8 +214,6 @@ SubtargetFeatures::ApplyFeatureFlag(FeatureBitset Bits, StringRef Feature,
<< "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
return Bits;
}
@ -234,14 +228,10 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
return FeatureBitset();
#ifndef NDEBUG
for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
"CPU table is not sorted");
}
for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) {
assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
"CPU features table is not sorted");
}
assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
"CPU table is not sorted");
assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
"CPU features table is not sorted");
#endif
// Resulting bits
FeatureBitset Bits;
@ -277,7 +267,7 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
if (Feature == "+help")
Help(CPUTable, FeatureTable);
Bits = ApplyFeatureFlag(Bits, Feature, FeatureTable);
ApplyFeatureFlag(Bits, Feature, FeatureTable);
}
return Bits;

View File

@ -316,12 +316,17 @@ static std::error_code readCoverageMappingData(
// Read the records in the coverage data section.
for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
if (Buf + 4 * sizeof(uint32_t) > End)
if (Buf + sizeof(CovMapHeader) > End)
return coveragemap_error::malformed;
uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
uint32_t NRecords =
endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
uint32_t FilenamesSize =
endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
uint32_t CoverageSize =
endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
Buf = reinterpret_cast<const char *>(++CovHeader);
switch (Version) {
case CoverageMappingVersion1:

View File

@ -12,12 +12,15 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@ -162,6 +165,98 @@ GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
}
int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
bool doCompression, std::string &Result) {
uint8_t Header[16], *P = Header;
std::string UncompressedNameStrings =
join(NameStrs.begin(), NameStrs.end(), StringRef(" "));
unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
P += EncLen;
auto WriteStringToResult = [&](size_t CompressedLen,
const std::string &InputStr) {
EncLen = encodeULEB128(CompressedLen, P);
P += EncLen;
char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
unsigned HeaderLen = P - &Header[0];
Result.append(HeaderStr, HeaderLen);
Result += InputStr;
return 0;
};
if (!doCompression)
return WriteStringToResult(0, UncompressedNameStrings);
SmallVector<char, 128> CompressedNameStrings;
zlib::Status Success =
zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
zlib::BestSizeCompression);
if (Success != zlib::StatusOK)
return 1;
return WriteStringToResult(
CompressedNameStrings.size(),
std::string(CompressedNameStrings.data(), CompressedNameStrings.size()));
}
StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) {
auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
StringRef NameStr =
Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
return NameStr;
}
int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
std::string &Result) {
std::vector<std::string> NameStrs;
for (auto *NameVar : NameVars) {
NameStrs.push_back(getPGOFuncNameInitializer(NameVar));
}
return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
}
int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
NameStrings.size());
while (P < EndP) {
uint32_t N;
uint64_t UncompressedSize = decodeULEB128(P, &N);
P += N;
uint64_t CompressedSize = decodeULEB128(P, &N);
P += N;
bool isCompressed = (CompressedSize != 0);
SmallString<128> UncompressedNameStrings;
StringRef NameStrings;
if (isCompressed) {
StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
CompressedSize);
if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
UncompressedSize) != zlib::StatusOK)
return 1;
P += CompressedSize;
NameStrings = StringRef(UncompressedNameStrings.data(),
UncompressedNameStrings.size());
} else {
NameStrings =
StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
P += UncompressedSize;
}
// Now parse the name strings.
SmallVector<StringRef, 0> Names;
NameStrings.split(Names, ' ');
for (StringRef &Name : Names)
Symtab.addFuncName(Name);
while (P < EndP && *P == 0)
P++;
}
Symtab.finalizeSymtab();
return 0;
}
instrprof_error
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
uint64_t Weight) {

View File

@ -446,7 +446,7 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
return EC;
}
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
static long ArgMax = sysconf(_SC_ARG_MAX);
// System says no practical limit.
@ -456,7 +456,7 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
// Conservatively account for space required by environment variables.
long HalfArgMax = ArgMax / 2;
size_t ArgLength = 0;
size_t ArgLength = Program.size() + 1;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
ArgLength += strlen(*I) + 1;

View File

@ -535,14 +535,15 @@ llvm::sys::writeFileWithEncoding(StringRef FileName, StringRef Contents,
return EC;
}
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
bool llvm::sys::commandLineFitsWithinSystemLimits(StringRef Program, ArrayRef<const char*> Args) {
// The documented max length of the command line passed to CreateProcess.
static const size_t MaxCommandStringLength = 32768;
size_t ArgLength = 0;
// Account for the trailing space for the program path and the
// trailing NULL of the last argument.
size_t ArgLength = ArgLenWithQuotes(Program.str().c_str()) + 2;
for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
I != E; ++I) {
// Account for the trailing space for every arg but the last one and the
// trailing NULL of the last argument.
// Account for the trailing space for every arg
ArgLength += ArgLenWithQuotes(*I) + 1;
if (ArgLength > MaxCommandStringLength) {
return false;

View File

@ -30,6 +30,9 @@
#define _WIN32_WINNT 0x0601
#define _WIN32_IE 0x0800 // MinGW at it again. FIXME: verify if still needed.
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@ -44,6 +47,21 @@
#include <string>
#include <vector>
#if !defined(__CYGWIN__) && !defined(__MINGW32__)
#include <VersionHelpers.h>
#else
// Cygwin does not have the IsWindows8OrGreater() API.
// Some version of mingw does not have the API either.
inline bool IsWindows8OrGreater() {
OSVERSIONINFO osvi = {};
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
if (!::GetVersionEx(&osvi))
return false;
return (osvi.dwMajorVersion > 6 ||
(osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
}
#endif // __CYGWIN__
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
return true;

View File

@ -57,6 +57,10 @@
#endif
#endif
#ifdef LLVM_ON_WIN32
#include "Windows/WindowsSupport.h"
#endif
using namespace llvm;
raw_ostream::~raw_ostream() {
@ -567,8 +571,21 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
#ifndef LLVM_ON_WIN32
bool ShouldWriteInChunks = false;
#else
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
// the latter has a size limit (66000 bytes or less, depending on heap usage).
bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
#endif
do {
ssize_t ret = ::write(FD, Ptr, Size);
size_t ChunkSize = Size;
if (ChunkSize > 32767 && ShouldWriteInChunks)
ChunkSize = 32767;
ssize_t ret = ::write(FD, Ptr, ChunkSize);
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.

View File

@ -722,7 +722,7 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::string UnOpInit::getAsString() const {
std::string Result;
switch (Opc) {
switch (getOpcode()) {
case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
case HEAD: Result = "!head"; break;
case TAIL: Result = "!tail"; break;
@ -850,7 +850,7 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
std::string BinOpInit::getAsString() const {
std::string Result;
switch (Opc) {
switch (getOpcode()) {
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
case AND: Result = "!and"; break;
@ -1054,7 +1054,7 @@ Init *TernOpInit::resolveReferences(Record &R,
const RecordVal *RV) const {
Init *lhs = LHS->resolveReferences(R, RV);
if (Opc == IF && lhs != LHS) {
if (getOpcode() == IF && lhs != LHS) {
IntInit *Value = dyn_cast<IntInit>(lhs);
if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
Value = dyn_cast<IntInit>(I);
@ -1082,7 +1082,7 @@ Init *TernOpInit::resolveReferences(Record &R,
std::string TernOpInit::getAsString() const {
std::string Result;
switch (Opc) {
switch (getOpcode()) {
case SUBST: Result = "!subst"; break;
case FOREACH: Result = "!foreach"; break;
case IF: Result = "!if"; break;

View File

@ -77,7 +77,8 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
/// SetValue -
/// Return true on error, false on success.
bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
const std::vector<unsigned> &BitList, Init *V) {
ArrayRef<unsigned> BitList, Init *V,
bool AllowSelfAssignment) {
if (!V) return false;
if (!CurRec) CurRec = &CurMultiClass->Rec;
@ -91,8 +92,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
// in the resolution machinery.
if (BitList.empty())
if (VarInit *VI = dyn_cast<VarInit>(V))
if (VI->getNameInit() == ValName)
return false;
if (VI->getNameInit() == ValName && !AllowSelfAssignment)
return true;
// If we are assigning to a subset of the bits in the value... then we must be
// assigning to a field of BitsRecTy, which must have a BitsInit
@ -165,7 +166,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
if (i < SubClass.TemplateArgs.size()) {
// If a value is specified for this template arg, set it now.
if (SetValue(CurRec, SubClass.RefRange.Start, TArgs[i],
std::vector<unsigned>(), SubClass.TemplateArgs[i]))
None, SubClass.TemplateArgs[i]))
return true;
// Resolve it next.
@ -243,8 +244,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
// If a value is specified for this template arg, set it in the
// superclass now.
if (SetValue(CurRec, SubMultiClass.RefRange.Start, SMCTArgs[i],
std::vector<unsigned>(),
SubMultiClass.TemplateArgs[i]))
None, SubMultiClass.TemplateArgs[i]))
return true;
// Resolve it next.
@ -258,8 +258,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
for (const auto &Def :
makeArrayRef(CurMC->DefPrototypes).slice(newDefStart)) {
if (SetValue(Def.get(), SubMultiClass.RefRange.Start, SMCTArgs[i],
std::vector<unsigned>(),
SubMultiClass.TemplateArgs[i]))
None, SubMultiClass.TemplateArgs[i]))
return true;
// Resolve it next.
@ -332,8 +331,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
if (SetValue(IterRec.get(), Loc, IterVar->getName(),
std::vector<unsigned>(), IVal))
if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal))
return Error(Loc, "when instantiating this def");
// Resolve it next.
@ -1728,7 +1726,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
SMLoc ValLoc = Lex.getLoc();
Init *Val = ParseValue(CurRec, Type);
if (!Val ||
SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
SetValue(CurRec, ValLoc, DeclName, None, Val))
// Return the name, even if an error is thrown. This is so that we can
// continue to make some progress, even without the value having been
// initialized.
@ -2358,8 +2356,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
// Set the value for NAME. We don't resolve references to it 'til later,
// though, so that uses in nested multiclass names don't get
// confused.
if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME",
std::vector<unsigned>(), DefmPrefix)) {
if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix,
/*AllowSelfAssignment*/true)) {
Error(DefmPrefixRange.Start, "Could not resolve " +
CurRec->getNameInitAsString() + ":NAME to '" +
DefmPrefix->getAsUnquotedString() + "'");
@ -2446,8 +2444,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
// Check if a value is specified for this temp-arg.
if (i < TemplateVals.size()) {
// Set it now.
if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
TemplateVals[i]))
if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], None, TemplateVals[i]))
return true;
// Resolve it next.

View File

@ -105,10 +105,13 @@ class TGParser {
private: // Semantic analysis methods.
bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
const std::vector<unsigned> &BitList, Init *V);
ArrayRef<unsigned> BitList, Init *V,
bool AllowSelfAssignment = false);
bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
const std::vector<unsigned> &BitList, Init *V) {
return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
ArrayRef<unsigned> BitList, Init *V,
bool AllowSelfAssignment = false) {
return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V,
AllowSelfAssignment);
}
bool AddSubClass(Record *Rec, SubClassReference &SubClass);
bool AddSubMultiClass(MultiClass *CurMC,

View File

@ -124,6 +124,14 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeaturePerfMon,
FeatureZCRegMove, FeatureZCZeroing]>;
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors",
[FeatureFPARMv8,
FeatureNEON,
FeatureCrypto,
FeatureCRC,
FeaturePerfMon]>;
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC,
@ -136,6 +144,8 @@ def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;
//===----------------------------------------------------------------------===//
// Assembly parser

View File

@ -158,7 +158,7 @@ INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
"AArch64 A57 FP Load-Balancing", false, false)
namespace {
/// A Chain is a sequence of instructions that are linked together by
/// A Chain is a sequence of instructions that are linked together by
/// an accumulation operand. For example:
///
/// fmul d0<def>, ?
@ -285,7 +285,7 @@ class Chain {
std::string str() const {
std::string S;
raw_string_ostream OS(S);
OS << "{";
StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
@ -427,7 +427,7 @@ Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
return Ch;
}
}
// Bailout case - just return the first item.
Chain *Ch = L.front();
L.erase(L.begin());
@ -495,7 +495,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
RS.enterBasicBlock(&MBB);
RS.forward(MachineBasicBlock::iterator(G->getStart()));
// Can we find an appropriate register that is available throughout the life
// Can we find an appropriate register that is available throughout the life
// of the chain?
unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));

View File

@ -2426,7 +2426,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
continue;
}
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@ -5074,7 +5074,7 @@ static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
// value of the first element. E.g.
// value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
@ -9491,6 +9491,103 @@ static SDValue performBRCONDCombine(SDNode *N,
return SDValue();
}
// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
// as well as whether the test should be inverted. This code is required to
// catch these cases (as opposed to standard dag combines) because
// AArch64ISD::TBZ is matched during legalization.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
SelectionDAG &DAG) {
if (!Op->hasOneUse())
return Op;
// We don't handle undef/constant-fold cases below, as they should have
// already been taken care of (e.g. and of 0, test of undefined shifted bits,
// etc.)
// (tbz (trunc x), b) -> (tbz x, b)
// This case is just here to enable more of the below cases to be caught.
if (Op->getOpcode() == ISD::TRUNCATE &&
Bit < Op->getValueType(0).getSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
if (Op->getNumOperands() != 2)
return Op;
auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!C)
return Op;
switch (Op->getOpcode()) {
default:
return Op;
// (tbz (and x, m), b) -> (tbz x, b)
case ISD::AND:
if ((C->getZExtValue() >> Bit) & 1)
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
return Op;
// (tbz (shl x, c), b) -> (tbz x, b-c)
case ISD::SHL:
if (C->getZExtValue() <= Bit &&
(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit - C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
case ISD::SRA:
Bit = Bit + C->getZExtValue();
if (Bit >= Op->getValueType(0).getSizeInBits())
Bit = Op->getValueType(0).getSizeInBits() - 1;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
// (tbz (srl x, c), b) -> (tbz x, b+c)
case ISD::SRL:
if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit + C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (xor x, -1), b) -> (tbnz x, b)
case ISD::XOR:
if ((C->getZExtValue() >> Bit) & 1)
Invert = !Invert;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
}
// Optimize test single bit zero/non-zero and branch.
static SDValue performTBZCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
bool Invert = false;
SDValue TestSrc = N->getOperand(1);
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
if (TestSrc == NewTestSrc)
return SDValue();
unsigned NewOpc = N->getOpcode();
if (Invert) {
if (NewOpc == AArch64ISD::TBZ)
NewOpc = AArch64ISD::TBNZ;
else {
assert(NewOpc == AArch64ISD::TBNZ);
NewOpc = AArch64ISD::TBZ;
}
}
SDLoc DL(N);
return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
}
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@ -9642,6 +9739,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
case AArch64ISD::TBZ:
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCONDCombine(N, DCI, DAG, 2, 3);
case AArch64ISD::DUP:

View File

@ -613,21 +613,6 @@ static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
MachineInstr *Op1) {
assert(MI->memoperands_empty() && "expected a new machineinstr");
size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
(Op1->memoperands_end() - Op1->memoperands_begin());
MachineFunction *MF = MI->getParent()->getParent();
MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
MachineSDNode::mmo_iterator MemEnd =
std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
MI->setMemRefs(MemBegin, MemEnd);
}
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
@ -692,10 +677,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(RtNewDest))
.addOperand(BaseRegOp)
.addImm(OffsetImm);
// Copy MachineMemOperands from the original loads.
concatenateMemOperands(NewMemMI, I, Paired);
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
DEBUG(
dbgs()
@ -786,9 +769,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
TII->get(NewOpc))
.addOperand(getLdStRegOp(I))
.addOperand(BaseRegOp)
.addImm(OffsetImm);
// Copy MachineMemOperands from the original stores.
concatenateMemOperands(MIB, I, Paired);
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
} else {
// Handle Unscaled
if (IsUnscaled)

View File

@ -33,7 +33,14 @@ class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
enum ARMProcFamilyEnum {
Others,
CortexA35,
CortexA53,
CortexA57,
Cyclone,
ExynosM1
};
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
@ -143,6 +150,7 @@ class AArch64Subtarget : public AArch64GenSubtargetInfo {
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
bool isExynosM1() const { return CPUString == "exynos-m1"; }
bool useAA() const override { return isCortexA53(); }

View File

@ -834,7 +834,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
};
uint32_t
AArch64SysReg::SysRegMapper::fromString(StringRef Name,
AArch64SysReg::SysRegMapper::fromString(StringRef Name,
const FeatureBitset& FeatureBits, bool &Valid) const {
std::string NameLower = Name.lower();
@ -878,7 +878,7 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name,
}
std::string
AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
AArch64SysReg::SysRegMapper::toString(uint32_t Bits,
const FeatureBitset& FeatureBits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {

View File

@ -285,17 +285,17 @@ struct AArch64NamedImmMapper {
// Zero value of FeatureBitSet means the mapping is always available
FeatureBitset FeatureBitSet;
bool isNameEqual(std::string Other,
bool isNameEqual(std::string Other,
const FeatureBitset& FeatureBits) const {
if (FeatureBitSet.any() &&
if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Name == Other;
}
bool isValueEqual(uint32_t Other,
bool isValueEqual(uint32_t Other,
const FeatureBitset& FeatureBits) const {
if (FeatureBitSet.any() &&
if (FeatureBitSet.any() &&
(FeatureBitSet & FeatureBits).none())
return false;
return Value == Other;
@ -310,7 +310,7 @@ struct AArch64NamedImmMapper {
StringRef toString(uint32_t Value, const FeatureBitset& FeatureBits,
bool &Valid) const;
// Maps string to value, depending on availability for FeatureBits given
uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
uint32_t fromString(StringRef Name, const FeatureBitset& FeatureBits,
bool &Valid) const;
/// Many of the instructions allow an alternative assembly form consisting of
@ -1322,7 +1322,7 @@ namespace AArch64TLBI {
return true;
}
}
}
}
namespace AArch64II {
/// Target Operand Flag enum.

View File

@ -118,6 +118,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"true",
"Support flat address space">;
def FeatureXNACK : SubtargetFeature<"xnack",
"EnableXNACK",
"true",
"Enable XNACK support">;
def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"EnableVGPRSpilling",
"true",

View File

@ -417,13 +417,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
if (VCCUsed || FlatUsed)
if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
MaxSGPR += 2;
if (FlatUsed) {
MaxSGPR += 2;
// 2 additional for VI+.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
if (FlatUsed)
MaxSGPR += 2;
if (STM.isXNACKEnabled())
MaxSGPR += 2;
}
@ -620,6 +620,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;

View File

@ -204,14 +204,6 @@ def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi8_flat : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi8_flat : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi8_constant : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@ -243,14 +235,6 @@ def sextloadi16_global : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16_flat : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
}]>;
def sextloadi16_flat : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16_constant : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
@ -299,16 +283,6 @@ def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def truncstorei8_flat : PatFrag<(ops node:$val, node:$ptr),
(truncstorei8 node:$val, node:$ptr), [{
return isFlatStore(dyn_cast<StoreSDNode>(N));
}]>;
def truncstorei16_flat : PatFrag<(ops node:$val, node:$ptr),
(truncstorei16 node:$val, node:$ptr), [{
return isFlatStore(dyn_cast<StoreSDNode>(N));
}]>;
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isLocalStore(dyn_cast<StoreSDNode>(N));
@ -385,15 +359,6 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
def flat_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N));
}]>;
def flat_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isFlatStore(dyn_cast<StoreSDNode>(N));
}]>;
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;

View File

@ -73,6 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
EnableXNACK(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),

View File

@ -76,6 +76,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool EnableIfCvt;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableXNACK;
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
@ -290,6 +291,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
}
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
bool isXNACKEnabled() const {
return EnableXNACK;
}
unsigned getMaxWavesPerCU() const {
if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return 10;

View File

@ -264,42 +264,6 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
let Predicates = [HasFlatAddressSpace] in {
class FLATLoad_Pattern <FLAT Instr_ADDR64, ValueType vt,
PatFrag flat_ld> :
Pat <(vt (flat_ld i64:$ptr)),
(Instr_ADDR64 $ptr, 0, 0, 0)
>;
def : FLATLoad_Pattern <FLAT_LOAD_SBYTE, i32, sextloadi8_flat>;
def : FLATLoad_Pattern <FLAT_LOAD_UBYTE, i32, az_extloadi8_flat>;
def : FLATLoad_Pattern <FLAT_LOAD_SSHORT, i32, sextloadi16_flat>;
def : FLATLoad_Pattern <FLAT_LOAD_USHORT, i32, az_extloadi16_flat>;
def : FLATLoad_Pattern <FLAT_LOAD_DWORD, i32, flat_load>;
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, flat_load>;
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, i64, az_extloadi32_flat>;
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX2, v2i32, flat_load>;
def : FLATLoad_Pattern <FLAT_LOAD_DWORDX4, v4i32, flat_load>;
class FLATStore_Pattern <FLAT Instr, ValueType vt, PatFrag st> :
Pat <(st vt:$value, i64:$ptr),
(Instr $value, $ptr, 0, 0, 0)
>;
def : FLATStore_Pattern <FLAT_STORE_BYTE, i32, truncstorei8_flat>;
def : FLATStore_Pattern <FLAT_STORE_SHORT, i32, truncstorei16_flat>;
def : FLATStore_Pattern <FLAT_STORE_DWORD, i32, flat_store>;
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, i64, flat_store>;
def : FLATStore_Pattern <FLAT_STORE_DWORDX2, v2i32, flat_store>;
def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
} // End HasFlatAddressSpace predicate
let Predicates = [isCI] in {
// Convert (x - floor(x)) to fract(x)
@ -320,20 +284,10 @@ def : Pat <
//===----------------------------------------------------------------------===//
// Patterns to generate flat for global
// Flat Patterns
//===----------------------------------------------------------------------===//
def useFlatForGlobal : Predicate <
"Subtarget->useFlatForGlobal() || "
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
let Predicates = [useFlatForGlobal] in {
// 1. Offset as 20bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
let Predicates = [isCIVI] in {
// Patterns for global loads with no offset
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
@ -341,24 +295,24 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(inst $addr, 0, 0, 0)
>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $data, $addr, 0, 0, 0)
>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr, vt:$data)),
@ -376,4 +330,4 @@ def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
} // End Predicates = [useFlatForGlobal]
} // End Predicates = [isCIVI]

View File

@ -105,51 +105,53 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
// We reserved the last registers for this. Shift it down to the end of those
// which were actually used.
//
// FIXME: It might be safer to use a pseudoregister before replacement.
if (!ST.hasSGPRInitBug()) {
// We reserved the last registers for this. Shift it down to the end of those
// which were actually used.
//
// FIXME: It might be safer to use a pseudoregister before replacement.
// FIXME: We should be able to eliminate unused input registers. We only
// cannot do this for the resources required for scratch access. For now we
// skip over user SGPRs and may leave unused holes.
// FIXME: We should be able to eliminate unused input registers. We only
// cannot do this for the resources required for scratch access. For now we
// skip over user SGPRs and may leave unused holes.
// We find the resource first because it has an alignment requirement.
if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
// We find the resource first because it has an alignment requirement.
if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
// Skip the last 2 elements because the last one is reserved for VCC, and
// this is the 2nd to last element already.
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
// Pick the first unallocated one. Make sure we don't clobber the other
// reserved input we needed.
if (!MRI.isPhysRegUsed(Reg)) {
assert(MRI.isAllocatable(Reg));
MRI.replaceRegWith(ScratchRsrcReg, Reg);
ScratchRsrcReg = Reg;
MFI->setScratchRSrcReg(ScratchRsrcReg);
break;
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
// Skip the last 2 elements because the last one is reserved for VCC, and
// this is the 2nd to last element already.
for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
// Pick the first unallocated one. Make sure we don't clobber the other
// reserved input we needed.
if (!MRI.isPhysRegUsed(Reg)) {
assert(MRI.isAllocatable(Reg));
MRI.replaceRegWith(ScratchRsrcReg, Reg);
ScratchRsrcReg = Reg;
MFI->setScratchRSrcReg(ScratchRsrcReg);
break;
}
}
}
}
if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
// Skip the last 2 elements because the last one is reserved for VCC, and
// this is the 2nd to last element already.
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
// scratch descriptor, since we havent added its uses yet.
if (!MRI.isPhysRegUsed(Reg)) {
assert(MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
// Skip the last 2 elements because the last one is reserved for VCC, and
// this is the 2nd to last element already.
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
// scratch descriptor, since we havent added its uses yet.
if (!MRI.isPhysRegUsed(Reg)) {
assert(MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
ScratchWaveOffsetReg = Reg;
MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
break;
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
ScratchWaveOffsetReg = Reg;
MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
break;
}
}
}
}

View File

@ -134,6 +134,34 @@ def SIconstdata_ptr : SDNode<
SDTCisVT<0, i64>]>
>;
//===----------------------------------------------------------------------===//
// PatFrags for FLAT instructions
//===----------------------------------------------------------------------===//
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
isConstantLoad(cast<LoadSDNode>(N), -1);
}]>;
def flat_load : flat_ld <load>;
def flat_az_extloadi8 : flat_ld <az_extloadi8>;
def flat_sextloadi8 : flat_ld <sextloadi8>;
def flat_az_extloadi16 : flat_ld <az_extloadi16>;
def flat_sextloadi16 : flat_ld <sextloadi16>;
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def flat_store: flat_st <store>;
def flat_truncstorei8 : flat_st <truncstorei8>;
def flat_truncstorei16 : flat_st <truncstorei16>;
def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
return isGlobalLoad(cast<LoadSDNode>(N)) ||
isConstantLoad(cast<LoadSDNode>(N), -1);

View File

@ -59,8 +59,6 @@ defm EXP : EXP_m;
// SMRD Instructions
//===----------------------------------------------------------------------===//
let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
@ -90,8 +88,6 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",

View File

@ -156,6 +156,17 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
if (LaneVGPR == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
// When compiling from inside Mesa, the compilation continues.
// Select an arbitrary register to avoid triggering assertions
// during subsequent passes.
LaneVGPR = AMDGPU::VGPR0;
}
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
// Add this register as live-in to all blocks to avoid machine verifer

View File

@ -37,13 +37,17 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
if (ST.isXNACKEnabled())
BaseIdx -= 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
// next sgpr128 down.
// 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
// 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
// either way.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
@ -54,13 +58,25 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
unsigned Idx;
if (!ST.isXNACKEnabled())
Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
else
Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Next register before reservations for flat_scr and vcc.
return AMDGPU::SGPR97;
if (!ST.isXNACKEnabled()) {
// Next register before reservations for flat_scr and vcc.
return AMDGPU::SGPR97;
} else {
// Next register before reservations for flat_scr, xnack_mask, vcc,
// and scratch resource.
return AMDGPU::SGPR91;
}
}
return AMDGPU::SGPR95;
@ -86,6 +102,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// for VCC/FLAT_SCR.
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
if (ST.isXNACKEnabled())
reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
@ -93,9 +112,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
// Assume XNACK_MASK is unused.
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
if (ST.isXNACKEnabled())
Limit -= 2;
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
@ -282,11 +303,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
if (Spill.VGPR == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@ -315,11 +331,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
if (Spill.VGPR == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)

View File

@ -101,3 +101,12 @@ def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
let Predicates = [isVI] in {
// 1. Offset as 20bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
} // End Predicates = [isVI]

View File

@ -252,6 +252,8 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors", []>;
def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
"Cortex-R4 ARM processors", []>;
@ -649,6 +651,12 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureCrypto,
FeatureZCZeroing]>;
def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1,
FeatureHWDiv,
FeatureHWDivARM,
FeatureT2XtPk,
FeatureCrypto,
FeatureCRC]>;
//===----------------------------------------------------------------------===//
// Register File Description

View File

@ -340,12 +340,12 @@ namespace {
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
#ifndef NDEBUG
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = &*MBBI;
unsigned MBBId = MBB->getNumber();
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
assert(std::is_sorted(MF->begin(), MF->end(),
[this](const MachineBasicBlock &LHS,
const MachineBasicBlock &RHS) {
return BBInfo[LHS.getNumber()].postOffset() <
BBInfo[RHS.getNumber()].postOffset();
}));
DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];

View File

@ -1986,23 +1986,6 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
return AddedRegPressure.size() <= MemRegs.size() * 2;
}
/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
MachineInstr *Op1) {
assert(MI->memoperands_empty() && "expected a new machineinstr");
size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
+ (Op1->memoperands_end() - Op1->memoperands_begin());
MachineFunction *MF = MI->getParent()->getParent();
MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
MachineSDNode::mmo_iterator MemEnd =
std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
MemEnd =
std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
MI->setMemRefs(MemBegin, MemEnd);
}
bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl, unsigned &NewOpc,
@ -2196,7 +2179,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
concatenateMemOperands(MIB, Op0, Op1);
MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumLDRDFormed;
} else {
@ -2210,7 +2193,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
if (!isT2)
MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
concatenateMemOperands(MIB, Op0, Op1);
MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
DEBUG(dbgs() << "Formed " << *MIB << "\n");
++NumSTRDFormed;
}

View File

@ -44,7 +44,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
CortexA57, CortexA72, Krait, Swift
CortexA57, CortexA72, Krait, Swift, ExynosM1
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass

View File

@ -251,6 +251,10 @@ def : Proc<"hexagonv60", HexagonModelV60,
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
def HexagonAsmParser : AsmParser {
bit HasMnemonicFirst = 0;
}
def HexagonAsmParserVariant : AsmParserVariant {
int Variant = 0;
string TokenizingCharacters = "#()=:.<>!+*";
@ -259,5 +263,6 @@ def HexagonAsmParserVariant : AsmParserVariant {
def Hexagon : Target {
// Pull in Instruction Info:
let InstructionSet = HexagonInstrInfo;
let AssemblyParsers = [HexagonAsmParser];
let AssemblyParserVariants = [HexagonAsmParserVariant];
}

View File

@ -5807,3 +5807,5 @@ include "HexagonInstrInfoV60.td"
include "HexagonInstrInfoVector.td"
include "HexagonInstrAlias.td"
include "HexagonSystemInst.td"

View File

@ -0,0 +1,113 @@
//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the Hexagon instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Cache manipulation instructions.
//===----------------------------------------------------------------------===//
let mayStore = 1 in
class ST_MISC_CACHEOP<dag outs, dag ins,
string asmstr, list<dag> pattern = [],
bits<3> amode, bits<3> type, bits<1> un>
: ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> {
bits<5> Rs;
bits<5> Rt;
bits<5> Rd;
let Inst{31-28} = 0b1010;
let Inst{27-25} = amode;
let Inst{24-22} = type;
let Inst{21} = un;
let Inst{20-16} = Rs;
let Inst{12-8} = Rt;
let Inst{4-0} = Rd;
}
let mayStore = 1 in
class ST_MISC_CACHEOP_SYS<dag outs, dag ins,
string asmstr, list<dag> pattern = [],
bits<3> amode, bits<3> type, bits<1> un>
: SYSInst<outs, ins, asmstr, pattern, ""> {
bits<5> Rs;
bits<5> Rt;
bits<5> Rd;
let Inst{31-28} = 0b1010;
let Inst{27-25} = amode;
let Inst{24-22} = type;
let Inst{21} = un;
let Inst{20-16} = Rs;
let Inst{12-8} = Rt;
let Inst{4-0} = Rd;
}
let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in {
def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins),
"syncht" , [], 0b100, 0b001, 0b0>;
}
let Rt = 0, Rd = 0 in {
let isSoloAin1 = 1 in {
def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
"dccleana($Rs)", [], 0b000, 0b000, 0b0>;
def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
"dcinva($Rs)", [], 0b000, 0b000, 0b1>;
def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
"dccleaninva($Rs)", [], 0b000, 0b001, 0b0>;
}
}
let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in {
def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt),
"l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>;
def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt),
"l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>;
}
let hasSideEffects = 0, isSolo = 1 in
class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp>
: JRInst <
(outs), (ins IntRegs:$Rs),
#mnemonic#"($Rs)" > {
bits<5> Rs;
let IClass = 0b0101;
let Inst{27-21} = 0b0110110;
let Inst{20-16} = Rs;
let Inst{13-12} = 0b00;
let Inst{11} = MajOp;
}
// Instruction cache invalidate
def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>;
// Zero an aligned 32-byte cacheline.
let isSoloAin1 = 1 in
def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs),
"dczeroa($Rs)"> {
bits<5> Rs;
let IClass = 0b1010;
let Inst{27-21} = 0b0000110;
let Inst{13} = 0b0;
let Inst{20-16} = Rs;
}
// Memory synchronization.
let hasSideEffects = 0, isSolo = 1 in
def Y2_isync: JRInst <(outs), (ins),
"isync"> {
let IClass = 0b0101;
let Inst{27-16} = 0b011111000000;
let Inst{13} = 0b0;
let Inst{9-0} = 0b0000000010;
}

View File

@ -5,6 +5,23 @@
pr38151.c
va-arg-22.c
# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
20030313-1.c
20030916-1.c
20031012-1.c
20041126-1.c
20060420-1.c
20071202-1.c
20120808-1.c
pr20527-1.c
pr27073.c
pr36339.c
pr37573.c
pr43236.c
pr43835.c
pr45070.c
pr51933.c
# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
struct-ret-1.c
va-arg-11.c

View File

@ -32,7 +32,6 @@ static unsigned getVectorRegSize(unsigned RegNo) {
return 64;
llvm_unreachable("Unknown vector reg!");
return 0;
}
static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,

View File

@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "X86ShuffleDecode.h"
#include "llvm/IR/Constants.h"
#include "llvm/CodeGen/MachineValueType.h"
//===----------------------------------------------------------------------===//
@ -296,54 +295,6 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
}
}
void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
// It is not an error for the PSHUFB mask to not be a vector of i8 because the
// constant pool uniques constants by their bit representation.
// e.g. the following take up the same space in the constant pool:
// i128 -170141183420855150465331762880109871104
//
// <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
//
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
#ifndef NDEBUG
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
#endif
// This is a straightforward byte vector.
if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
int NumElements = MaskTy->getVectorNumElements();
ShuffleMask.reserve(NumElements);
for (int i = 0; i < NumElements; ++i) {
// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
// lane of the vector we're inside.
int Base = i & ~0xf;
Constant *COp = C->getAggregateElement(i);
if (!COp) {
ShuffleMask.clear();
return;
} else if (isa<UndefValue>(COp)) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
// If the high bit (7) of the byte is set, the element is zeroed.
if (Element & (1 << 7))
ShuffleMask.push_back(SM_SentinelZero);
else {
// Only the least significant 4 bits of the byte are used.
int Index = Base + (Element & 0xf);
ShuffleMask.push_back(Index);
}
}
}
// TODO: Handle funny-looking vectors too.
}
void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
@ -388,68 +339,6 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
// It is not an error for the PSHUFB mask to not be a vector of i8 because the
// constant pool uniques constants by their bit representation.
// e.g. the following take up the same space in the constant pool:
// i128 -170141183420855150465331762880109871104
//
// <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
//
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
return;
// Only support vector types.
if (!MaskTy->isVectorTy())
return;
// Make sure its an integer type.
Type *VecEltTy = MaskTy->getVectorElementType();
if (!VecEltTy->isIntegerTy())
return;
// Support any element type from byte up to element size.
// This is necesary primarily because 64-bit elements get split to 32-bit
// in the constant pool on 32-bit target.
unsigned EltTySize = VecEltTy->getIntegerBitWidth();
if (EltTySize < 8 || EltTySize > ElSize)
return;
unsigned NumElements = MaskTySize / ElSize;
assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
"Unexpected number of vector elements.");
ShuffleMask.reserve(NumElements);
unsigned NumElementsPerLane = 128 / ElSize;
unsigned Factor = ElSize / EltTySize;
for (unsigned i = 0; i < NumElements; ++i) {
Constant *COp = C->getAggregateElement(i * Factor);
if (!COp) {
ShuffleMask.clear();
return;
} else if (isa<UndefValue>(COp)) {
ShuffleMask.push_back(SM_SentinelUndef);
continue;
}
int Index = i & ~(NumElementsPerLane - 1);
uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
if (ElSize == 64)
Index += (Element >> 1) & 0x1;
else
Index += Element & 0x3;
ShuffleMask.push_back(Index);
}
// TODO: Handle funny-looking vectors too.
}
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
unsigned NumDstElts = DstVT.getVectorNumElements();
unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
@ -572,58 +461,4 @@ void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
}
}
void DecodeVPERMVMask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
if (MaskTy->isVectorTy()) {
unsigned NumElements = MaskTy->getVectorNumElements();
if (NumElements == VT.getVectorNumElements()) {
for (unsigned i = 0; i < NumElements; ++i) {
Constant *COp = C->getAggregateElement(i);
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
ShuffleMask.clear();
return;
}
if (isa<UndefValue>(COp))
ShuffleMask.push_back(SM_SentinelUndef);
else {
uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
Element &= (1 << NumElements) - 1;
ShuffleMask.push_back(Element);
}
}
}
return;
}
// Scalar value; just broadcast it
if (!isa<ConstantInt>(C))
return;
uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
int NumElements = VT.getVectorNumElements();
Element &= (1 << NumElements) - 1;
for (int i = 0; i < NumElements; ++i)
ShuffleMask.push_back(Element);
}
void DecodeVPERMV3Mask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
unsigned NumElements = MaskTy->getVectorNumElements();
if (NumElements == VT.getVectorNumElements()) {
for (unsigned i = 0; i < NumElements; ++i) {
Constant *COp = C->getAggregateElement(i);
if (!COp) {
ShuffleMask.clear();
return;
}
if (isa<UndefValue>(COp))
ShuffleMask.push_back(SM_SentinelUndef);
else {
uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
Element &= (1 << NumElements*2) - 1;
ShuffleMask.push_back(Element);
}
}
}
}
} // llvm namespace

View File

@ -23,7 +23,6 @@
//===----------------------------------------------------------------------===//
namespace llvm {
class Constant;
class MVT;
enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 };
@ -72,9 +71,6 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// different datatypes and vector widths.
void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a PSHUFB mask from an IR-level vector constant.
void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a PSHUFB mask from a raw array of constants such as from
/// BUILD_VECTOR.
void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
@ -95,10 +91,6 @@ void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
SmallVectorImpl<int> &ShuffleMask);
@ -118,18 +110,10 @@ void DecodeEXTRQIMask(int Len, int Idx,
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
void DecodeVPERMVMask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
void DecodeVPERMV3Mask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);

View File

@ -1098,9 +1098,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
// All x86 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// All x86 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();

View File

@ -78,27 +78,6 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
}
/// usesTheStack - This function checks if any of the users of EFLAGS
/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
/// to use the stack, and if we don't adjust the stack we clobber the first
/// frame index.
/// See X86InstrInfo::copyPhysReg.
static bool usesTheStack(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
// Conservativley assume that inline assembly might use the stack.
if (MF.hasInlineAsm())
return true;
return any_of(MRI.reg_instructions(X86::EFLAGS),
[](const MachineInstr &RI) { return RI.isCopy(); });
}
static bool doesStackUseImplyFP(const MachineFunction &MF) {
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
return IsWin64Prologue && usesTheStack(MF);
}
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@ -112,8 +91,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
MFI->hasStackMap() || MFI->hasPatchPoint() ||
doesStackUseImplyFP(MF));
MFI->hasStackMap() || MFI->hasPatchPoint());
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@ -965,11 +943,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// push and pop from the stack.
if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
!TRI->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64CC && // Win64 has no Red Zone
!usesTheStack(MF) && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64CC && // Win64 has no Red Zone
!MFI->hasOpaqueSPAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);

View File

@ -157,13 +157,9 @@ namespace {
/// performance.
bool OptForSize;
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
OptForMinSize(false) {}
: SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@ -535,10 +531,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptFor[Min]Size are used in pattern predicates that isel is matching.
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->optForSize();
OptForMinSize = MF->getFunction()->optForMinSize();
assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {

View File

@ -18,6 +18,7 @@
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
@ -4556,6 +4557,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
Result = DAG.getBitcast(CastVT, Result);
Vec256 = DAG.getBitcast(CastVT, Vec256);
Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
return DAG.getBitcast(ResultVT, Vec256);
@ -4851,8 +4853,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodePSHUFBMask(C, Mask);
if (Mask.empty())
return false;
break;
}
@ -4870,7 +4870,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
// Mask only contains negative index if an element is zero.
if (std::any_of(Mask.begin(), Mask.end(),
[](int M){ return M == SM_SentinelZero; }))
@ -4948,8 +4947,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMVMask(C, VT, Mask);
if (Mask.empty())
return false;
break;
}
return false;
@ -5000,8 +4997,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMV3Mask(C, VT, Mask);
if (Mask.empty())
return false;
break;
}
return false;
@ -5009,6 +5004,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
default: llvm_unreachable("unknown target shuffle node");
}
// Empty mask indicates the decode failed.
if (Mask.empty())
return false;
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
@ -17372,6 +17371,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
if (!IntrData) {
if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
return MarkEHRegistrationNode(Op, DAG);
if (IntNo == llvm::Intrinsic::x86_flags_read_u32 ||
IntNo == llvm::Intrinsic::x86_flags_read_u64 ||
IntNo == llvm::Intrinsic::x86_flags_write_u32 ||
IntNo == llvm::Intrinsic::x86_flags_write_u64) {
// We need a frame pointer because this will get lowered to a PUSH/POP
// sequence.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setHasOpaqueSPAdjustment(true);
// Don't do anything here, we will expand these intrinsics out later
// during ExpandISelPseudos in EmitInstrWithCustomInserter.
return SDValue();
}
return SDValue();
}
@ -21144,6 +21155,47 @@ static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
static MachineBasicBlock *EmitWRPKRU(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget *Subtarget) {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
// insert input VAL into EAX
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI->getOperand(0).getReg());
// insert zero to ECX
BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
.addReg(X86::ECX)
.addReg(X86::ECX);
// insert zero to EDX
BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::EDX)
.addReg(X86::EDX)
.addReg(X86::EDX);
// insert WRPKRU instruction
BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
static MachineBasicBlock *EmitRDPKRU(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget *Subtarget) {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
// insert zero to ECX
BuildMI(*BB, MI, dl, TII->get(X86::XOR32rr), X86::ECX)
.addReg(X86::ECX)
.addReg(X86::ECX);
// insert RDPKRU instruction
BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::EAX);
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget *Subtarget) {
DebugLoc dl = MI->getDebugLoc();
@ -22495,6 +22547,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V64I1:
return EmitLoweredSelect(MI, BB);
case X86::RDFLAGS32:
case X86::RDFLAGS64: {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
unsigned PushF =
MI->getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
unsigned Pop =
MI->getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
BuildMI(*BB, MI, DL, TII->get(PushF));
BuildMI(*BB, MI, DL, TII->get(Pop), MI->getOperand(0).getReg());
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::WRFLAGS32:
case X86::WRFLAGS64: {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
unsigned Push =
MI->getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
unsigned PopF =
MI->getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI->getOperand(0).getReg());
BuildMI(*BB, MI, DL, TII->get(PopF));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::RELEASE_FADD32mr:
case X86::RELEASE_FADD64mr:
return EmitLoweredAtomicFP(MI, BB);
@ -22611,7 +22693,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB, Subtarget);
// PKU feature
case X86::WRPKRU:
return EmitWRPKRU(MI, BB, Subtarget);
case X86::RDPKRU:
return EmitRDPKRU(MI, BB, Subtarget);
// xbegin
case X86::XBEGIN:
return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
@ -23480,6 +23566,31 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
case X86ISD::BLENDI: {
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
assert(VT == V0.getSimpleValueType() && VT == V1.getSimpleValueType() &&
"Unexpected input vector types");
// Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
// operands and changing the mask to 1. This saves us a bunch of
// pattern-matching possibilities related to scalar math ops in SSE/AVX.
// x86InstrInfo knows how to commute this back after instruction selection
// if it would help register allocation.
// TODO: If optimizing for size or a processor that doesn't suffer from
// partial register update stalls, this should be transformed into a MOVSD
// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
if (VT == MVT::v2f64)
if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
}
return SDValue();
}
default:
return SDValue();
}
@ -23573,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
/// the operands which explicitly discard the lanes which are unused by this
/// operation to try to flow through the rest of the combiner the fact that
/// they're unused.
static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
// We only handle target-independent shuffles.
// FIXME: It would be easy and harmless to use the target shuffle mask
@ -23617,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
return SDValue();
// Only specific types are legal at this point, assert so we notice if and
// when these change.
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
VT == MVT::v4f64) &&
"Unknown vector type encountered!");
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
}
@ -23642,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
if (TLI.isTypeLegal(VT))
if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
return AddSub;
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
@ -27310,7 +27419,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
// from AH (which we otherwise need to do contortions to access).
if (N0.getOpcode() == ISD::UDIVREM &&
N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
(VT == MVT::i32 || VT == MVT::i64)) {
VT == MVT::i32) {
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
@ -27382,32 +27491,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
SDLoc DL(N);
EVT VT = N->getValueType(0);
// Canonicalize a v2f64 blend with a mask of 2 by swapping the vector
// operands and changing the mask to 1. This saves us a bunch of
// pattern-matching possibilities related to scalar math ops in SSE/AVX.
// x86InstrInfo knows how to commute this back after instruction selection
// if it would help register allocation.
// TODO: If optimizing for size or a processor that doesn't suffer from
// partial register update stalls, this should be transformed into a MOVSD
// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
if (VT == MVT::v2f64)
if (auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) {
SDValue NewMask = DAG.getConstant(1, DL, MVT::i8);
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
}
return SDValue();
}
static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
// Gather and Scatter instructions use k-registers for masks. The type of
@ -27840,6 +27923,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
@ -27851,6 +27935,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::PALIGNR:
case X86ISD::BLENDI:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
@ -27865,7 +27950,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
case ISD::MGATHER:
case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
}
@ -27902,6 +27986,18 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
/// we don't adjust the stack we clobber the first frame index.
/// See X86InstrInfo::copyPhysReg.
bool X86TargetLowering::hasCopyImplyingStackAdjustment(
MachineFunction *MF) const {
const MachineRegisterInfo &MRI = MF->getRegInfo();
return any_of(MRI.reg_instructions(X86::EFLAGS),
[](const MachineInstr &RI) { return RI.isCopy(); });
}
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.

View File

@ -697,6 +697,10 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
/// Return true if the MachineFunction contains a COPY which would imply
/// HasOpaqueSPAdjustment.
bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;

View File

@ -2366,6 +2366,7 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
RegisterClass KRCSrc, Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
(ins KRC:$src1, KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,

Some files were not shown because too many files have changed in this diff Show More