Upgrade our copies of clang, llvm, lld, lldb, compiler-rt and libc++ to
6.0.1 release (upstream r335540). Relnotes: yes MFC after: 2 weeks
This commit is contained in:
commit
6ccc06f6cb
@ -159,7 +159,6 @@ typedef struct user_fpregs elf_fpregset_t;
|
||||
# include <sys/procfs.h>
|
||||
#endif
|
||||
#include <sys/user.h>
|
||||
#include <sys/ustat.h>
|
||||
#include <linux/cyclades.h>
|
||||
#include <linux/if_eql.h>
|
||||
#include <linux/if_plip.h>
|
||||
@ -253,7 +252,19 @@ namespace __sanitizer {
|
||||
#endif // SANITIZER_LINUX || SANITIZER_FREEBSD
|
||||
|
||||
#if SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||
unsigned struct_ustat_sz = sizeof(struct ustat);
|
||||
// Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
|
||||
// has been removed from glibc 2.28.
|
||||
#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
|
||||
|| defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
|
||||
|| defined(__x86_64__)
|
||||
#define SIZEOF_STRUCT_USTAT 32
|
||||
#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
|
||||
|| defined(__powerpc__) || defined(__s390__)
|
||||
#define SIZEOF_STRUCT_USTAT 20
|
||||
#else
|
||||
#error Unknown size of struct ustat
|
||||
#endif
|
||||
unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
|
||||
unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
|
||||
unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
|
||||
#endif // SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||
|
@ -2058,15 +2058,15 @@ list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, con
|
||||
#endif
|
||||
if (__f != __l)
|
||||
{
|
||||
if (this != &__c)
|
||||
{
|
||||
size_type __s = _VSTD::distance(__f, __l);
|
||||
__c.__sz() -= __s;
|
||||
base::__sz() += __s;
|
||||
}
|
||||
__link_pointer __first = __f.__ptr_;
|
||||
--__l;
|
||||
__link_pointer __last = __l.__ptr_;
|
||||
if (this != &__c)
|
||||
{
|
||||
size_type __s = _VSTD::distance(__f, __l) + 1;
|
||||
__c.__sz() -= __s;
|
||||
base::__sz() += __s;
|
||||
}
|
||||
base::__unlink_nodes(__first, __last);
|
||||
__link_nodes(__p.__ptr_, __first, __last);
|
||||
#if _LIBCPP_DEBUG_LEVEL >= 2
|
||||
|
@ -18,7 +18,7 @@ bool uncaught_exception() _NOEXCEPT { return uncaught_exceptions() > 0; }
|
||||
|
||||
int uncaught_exceptions() _NOEXCEPT
|
||||
{
|
||||
# if _LIBCPPABI_VERSION > 1101
|
||||
# if _LIBCPPABI_VERSION > 1001
|
||||
return __cxa_uncaught_exceptions();
|
||||
# else
|
||||
return __cxa_uncaught_exception() ? 1 : 0;
|
||||
|
@ -421,7 +421,8 @@ public:
|
||||
/// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
|
||||
/// and \p DefIdx.
|
||||
/// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
|
||||
/// the list is modeled as <Reg:SubReg, SubIdx>.
|
||||
/// the list is modeled as <Reg:SubReg, SubIdx>. Operands with the undef
|
||||
/// flag are not added to this list.
|
||||
/// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce
|
||||
/// two elements:
|
||||
/// - %1:sub1, sub0
|
||||
@ -446,7 +447,8 @@ public:
|
||||
/// - %1:sub1, sub0
|
||||
///
|
||||
/// \returns true if it is possible to build such an input sequence
|
||||
/// with the pair \p MI, \p DefIdx. False otherwise.
|
||||
/// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
|
||||
/// False otherwise.
|
||||
///
|
||||
/// \pre MI.isExtractSubreg() or MI.isExtractSubregLike().
|
||||
///
|
||||
@ -465,7 +467,8 @@ public:
|
||||
/// - InsertedReg: %1:sub1, sub3
|
||||
///
|
||||
/// \returns true if it is possible to build such an input sequence
|
||||
/// with the pair \p MI, \p DefIdx. False otherwise.
|
||||
/// with the pair \p MI, \p DefIdx and the operand has no undef flag set.
|
||||
/// False otherwise.
|
||||
///
|
||||
/// \pre MI.isInsertSubreg() or MI.isInsertSubregLike().
|
||||
///
|
||||
|
@ -36,8 +36,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
|
||||
|
||||
// Intrinsics used to generate ctr-based loops. These should only be
|
||||
// generated by the PowerPC backend!
|
||||
// The branch intrinsic is marked as NoDuplicate because loop rotation will
|
||||
// attempt to duplicate it forming loops where a block reachable from one
|
||||
// instance of it can contain another.
|
||||
def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
|
||||
def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
|
||||
def int_ppc_is_decremented_ctr_nonzero :
|
||||
Intrinsic<[llvm_i1_ty], [], [IntrNoDuplicate]>;
|
||||
|
||||
// Intrinsics for [double]word extended forms of divide instructions
|
||||
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
|
||||
|
@ -502,6 +502,8 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
||||
}
|
||||
|
||||
FunctionInfo &FI = FunctionInfos[F];
|
||||
Handles.emplace_front(*this, F);
|
||||
Handles.front().I = Handles.begin();
|
||||
bool KnowNothing = false;
|
||||
|
||||
// Collect the mod/ref properties due to called functions. We only compute
|
||||
|
@ -153,9 +153,14 @@ public:
|
||||
if (IsCall != Other.IsCall)
|
||||
return false;
|
||||
|
||||
if (IsCall)
|
||||
return CS.getCalledValue() == Other.CS.getCalledValue();
|
||||
return Loc == Other.Loc;
|
||||
if (!IsCall)
|
||||
return Loc == Other.Loc;
|
||||
|
||||
if (CS.getCalledValue() != Other.CS.getCalledValue())
|
||||
return false;
|
||||
|
||||
return CS.arg_size() == Other.CS.arg_size() &&
|
||||
std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin());
|
||||
}
|
||||
|
||||
private:
|
||||
@ -179,12 +184,18 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
|
||||
}
|
||||
|
||||
static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
|
||||
if (MLOC.IsCall)
|
||||
return hash_combine(MLOC.IsCall,
|
||||
DenseMapInfo<const Value *>::getHashValue(
|
||||
MLOC.getCS().getCalledValue()));
|
||||
return hash_combine(
|
||||
MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
|
||||
if (!MLOC.IsCall)
|
||||
return hash_combine(
|
||||
MLOC.IsCall,
|
||||
DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
|
||||
|
||||
hash_code hash =
|
||||
hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue(
|
||||
MLOC.getCS().getCalledValue()));
|
||||
|
||||
for (const Value *Arg : MLOC.getCS().args())
|
||||
hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg));
|
||||
return hash;
|
||||
}
|
||||
|
||||
static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
|
||||
|
@ -1714,20 +1714,25 @@ bool IfConverter::IfConvertDiamondCommon(
|
||||
}
|
||||
|
||||
// Remove the duplicated instructions at the beginnings of both paths.
|
||||
// Skip dbg_value instructions
|
||||
// Skip dbg_value instructions.
|
||||
MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
|
||||
MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
|
||||
BBI1->NonPredSize -= NumDups1;
|
||||
BBI2->NonPredSize -= NumDups1;
|
||||
|
||||
// Skip past the dups on each side separately since there may be
|
||||
// differing dbg_value entries.
|
||||
// differing dbg_value entries. NumDups1 can include a "return"
|
||||
// instruction, if it's not marked as "branch".
|
||||
for (unsigned i = 0; i < NumDups1; ++DI1) {
|
||||
if (DI1 == MBB1.end())
|
||||
break;
|
||||
if (!DI1->isDebugValue())
|
||||
++i;
|
||||
}
|
||||
while (NumDups1 != 0) {
|
||||
++DI2;
|
||||
if (DI2 == MBB2.end())
|
||||
break;
|
||||
if (!DI2->isDebugValue())
|
||||
--NumDups1;
|
||||
}
|
||||
@ -1738,11 +1743,16 @@ bool IfConverter::IfConvertDiamondCommon(
|
||||
Redefs.stepForward(MI, Dummy);
|
||||
}
|
||||
}
|
||||
|
||||
BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
|
||||
MBB2.erase(MBB2.begin(), DI2);
|
||||
|
||||
// The branches have been checked to match, so it is safe to remove the branch
|
||||
// in BB1 and rely on the copy in BB2
|
||||
// The branches have been checked to match, so it is safe to remove the
|
||||
// branch in BB1 and rely on the copy in BB2. The complication is that
|
||||
// the blocks may end with a return instruction, which may or may not
|
||||
// be marked as "branch". If it's not, then it could be included in
|
||||
// "dups1", leaving the blocks potentially empty after moving the common
|
||||
// duplicates.
|
||||
#ifndef NDEBUG
|
||||
// Unanalyzable branches must match exactly. Check that now.
|
||||
if (!BBI1->IsBrAnalyzable)
|
||||
@ -1768,11 +1778,14 @@ bool IfConverter::IfConvertDiamondCommon(
|
||||
if (RemoveBranch)
|
||||
BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
|
||||
else {
|
||||
do {
|
||||
assert(DI2 != MBB2.begin());
|
||||
DI2--;
|
||||
} while (DI2->isBranch() || DI2->isDebugValue());
|
||||
DI2++;
|
||||
// Make DI2 point to the end of the range where the common "tail"
|
||||
// instructions could be found.
|
||||
while (DI2 != MBB2.begin()) {
|
||||
MachineBasicBlock::iterator Prev = std::prev(DI2);
|
||||
if (!Prev->isBranch() && !Prev->isDebugValue())
|
||||
break;
|
||||
DI2 = Prev;
|
||||
}
|
||||
}
|
||||
while (NumDups2 != 0) {
|
||||
// NumDups2 only counted non-dbg_value instructions, so this won't
|
||||
@ -1833,11 +1846,15 @@ bool IfConverter::IfConvertDiamondCommon(
|
||||
// a non-predicated in BBI2, then we don't want to predicate the one from
|
||||
// BBI2. The reason is that if we merged these blocks, we would end up with
|
||||
// two predicated terminators in the same block.
|
||||
// Also, if the branches in MBB1 and MBB2 were non-analyzable, then don't
|
||||
// predicate them either. They were checked to be identical, and so the
|
||||
// same branch would happen regardless of which path was taken.
|
||||
if (!MBB2.empty() && (DI2 == MBB2.end())) {
|
||||
MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
|
||||
MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
|
||||
if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) &&
|
||||
BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T))
|
||||
bool BB1Predicated = BBI1T != MBB1.end() && TII->isPredicated(*BBI1T);
|
||||
bool BB2NonPredicated = BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T);
|
||||
if (BB2NonPredicated && (BB1Predicated || !BBI2->IsBrAnalyzable))
|
||||
--DI2;
|
||||
}
|
||||
|
||||
|
@ -514,6 +514,39 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Detect invalid DBG_VALUE instructions, with a debug-use of a virtual
|
||||
// register that hasn't been defined yet. If we do not remove those here, then
|
||||
// the re-insertion of the DBG_VALUE instruction after register allocation
|
||||
// will be incorrect.
|
||||
// TODO: If earlier passes are corrected to generate sane debug information
|
||||
// (and if the machine verifier is improved to catch this), then these checks
|
||||
// could be removed or replaced by asserts.
|
||||
bool Discard = false;
|
||||
if (MI.getOperand(0).isReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
|
||||
const unsigned Reg = MI.getOperand(0).getReg();
|
||||
if (!LIS->hasInterval(Reg)) {
|
||||
// The DBG_VALUE is described by a virtual register that does not have a
|
||||
// live interval. Discard the DBG_VALUE.
|
||||
Discard = true;
|
||||
DEBUG(dbgs() << "Discarding debug info (no LIS interval): "
|
||||
<< Idx << " " << MI);
|
||||
} else {
|
||||
// The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
|
||||
// is defined dead at Idx (where Idx is the slot index for the instruction
|
||||
// preceeding the DBG_VALUE).
|
||||
const LiveInterval &LI = LIS->getInterval(Reg);
|
||||
LiveQueryResult LRQ = LI.Query(Idx);
|
||||
if (!LRQ.valueOutOrDead()) {
|
||||
// We have found a DBG_VALUE with the value in a virtual register that
|
||||
// is not live. Discard the DBG_VALUE.
|
||||
Discard = true;
|
||||
DEBUG(dbgs() << "Discarding debug info (reg not live): "
|
||||
<< Idx << " " << MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get or create the UserValue for (variable,offset) here.
|
||||
bool IsIndirect = MI.getOperand(1).isImm();
|
||||
if (IsIndirect)
|
||||
@ -522,7 +555,13 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
|
||||
const DIExpression *Expr = MI.getDebugExpression();
|
||||
UserValue *UV =
|
||||
getUserValue(Var, Expr, MI.getDebugLoc());
|
||||
UV->addDef(Idx, MI.getOperand(0), IsIndirect);
|
||||
if (!Discard)
|
||||
UV->addDef(Idx, MI.getOperand(0), IsIndirect);
|
||||
else {
|
||||
MachineOperand MO = MachineOperand::CreateReg(0U, false);
|
||||
MO.setIsDebug();
|
||||
UV->addDef(Idx, MO, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -513,6 +513,11 @@ public:
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
bool allowTailDupPlacement() const {
|
||||
assert(F);
|
||||
return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
@ -1018,7 +1023,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
|
||||
MachineBasicBlock *Succ1 = BestA.Dest;
|
||||
MachineBasicBlock *Succ2 = BestB.Dest;
|
||||
// Check to see if tail-duplication would be profitable.
|
||||
if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
|
||||
if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
|
||||
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
|
||||
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
|
||||
Chain, BlockFilter)) {
|
||||
@ -1044,7 +1049,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// When the option TailDupPlacement is on, this method checks if the
|
||||
/// When the option allowTailDupPlacement() is on, this method checks if the
|
||||
/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
|
||||
/// into all of its unplaced, unfiltered predecessors, that are not BB.
|
||||
bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
|
||||
@ -1493,7 +1498,7 @@ MachineBlockPlacement::selectBestSuccessor(
|
||||
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
|
||||
Chain, BlockFilter)) {
|
||||
// If tail duplication would make Succ profitable, place it.
|
||||
if (TailDupPlacement && shouldTailDuplicate(Succ))
|
||||
if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
|
||||
DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
|
||||
continue;
|
||||
}
|
||||
@ -1702,7 +1707,7 @@ void MachineBlockPlacement::buildChain(
|
||||
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
|
||||
MachineBasicBlock* BestSucc = Result.BB;
|
||||
bool ShouldTailDup = Result.ShouldTailDup;
|
||||
if (TailDupPlacement)
|
||||
if (allowTailDupPlacement())
|
||||
ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
|
||||
|
||||
// If an immediate successor isn't available, look for the best viable
|
||||
@ -1724,7 +1729,7 @@ void MachineBlockPlacement::buildChain(
|
||||
|
||||
// Placement may have changed tail duplication opportunities.
|
||||
// Check for that now.
|
||||
if (TailDupPlacement && BestSucc && ShouldTailDup) {
|
||||
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
|
||||
// If the chosen successor was duplicated into all its predecessors,
|
||||
// don't bother laying it out, just go round the loop again with BB as
|
||||
// the chain end.
|
||||
@ -2758,7 +2763,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
TailDupSize = TailDupPlacementAggressiveThreshold;
|
||||
}
|
||||
|
||||
if (TailDupPlacement) {
|
||||
if (allowTailDupPlacement()) {
|
||||
MPDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
if (MF.getFunction().optForSize())
|
||||
TailDupSize = 1;
|
||||
|
@ -1812,6 +1812,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
|
||||
return ValueTrackerResult();
|
||||
// Otherwise, we want the whole source.
|
||||
const MachineOperand &Src = Def->getOperand(1);
|
||||
if (Src.isUndef())
|
||||
return ValueTrackerResult();
|
||||
return ValueTrackerResult(Src.getReg(), Src.getSubReg());
|
||||
}
|
||||
|
||||
@ -1855,6 +1857,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
|
||||
}
|
||||
|
||||
const MachineOperand &Src = Def->getOperand(SrcIdx);
|
||||
if (Src.isUndef())
|
||||
return ValueTrackerResult();
|
||||
return ValueTrackerResult(Src.getReg(), Src.getSubReg());
|
||||
}
|
||||
|
||||
@ -2023,6 +2027,10 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
|
||||
for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
|
||||
const MachineOperand &MO = Def->getOperand(i);
|
||||
assert(MO.isReg() && "Invalid PHI instruction");
|
||||
// We have no code to deal with undef operands. They shouldn't happen in
|
||||
// normal programs anyway.
|
||||
if (MO.isUndef())
|
||||
return ValueTrackerResult();
|
||||
Res.addSource(MO.getReg(), MO.getSubReg());
|
||||
}
|
||||
|
||||
@ -2079,9 +2087,14 @@ ValueTrackerResult ValueTracker::getNextSource() {
|
||||
// If we can still move up in the use-def chain, move to the next
|
||||
// definition.
|
||||
if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
|
||||
Def = MRI.getVRegDef(Reg);
|
||||
DefIdx = MRI.def_begin(Reg).getOperandNo();
|
||||
DefSubReg = Res.getSrcSubReg(0);
|
||||
MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
|
||||
if (DI != MRI.def_end()) {
|
||||
Def = DI->getParent();
|
||||
DefIdx = DI.getOperandNo();
|
||||
DefSubReg = Res.getSrcSubReg(0);
|
||||
} else {
|
||||
Def = nullptr;
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
@ -1151,6 +1151,8 @@ bool TargetInstrInfo::getRegSequenceInputs(
|
||||
for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
|
||||
OpIdx += 2) {
|
||||
const MachineOperand &MOReg = MI.getOperand(OpIdx);
|
||||
if (MOReg.isUndef())
|
||||
continue;
|
||||
const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
|
||||
assert(MOSubIdx.isImm() &&
|
||||
"One of the subindex of the reg_sequence is not an immediate");
|
||||
@ -1174,6 +1176,8 @@ bool TargetInstrInfo::getExtractSubregInputs(
|
||||
// Def = EXTRACT_SUBREG v0.sub1, sub0.
|
||||
assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
|
||||
const MachineOperand &MOReg = MI.getOperand(1);
|
||||
if (MOReg.isUndef())
|
||||
return false;
|
||||
const MachineOperand &MOSubIdx = MI.getOperand(2);
|
||||
assert(MOSubIdx.isImm() &&
|
||||
"The subindex of the extract_subreg is not an immediate");
|
||||
@ -1198,6 +1202,8 @@ bool TargetInstrInfo::getInsertSubregInputs(
|
||||
assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
|
||||
const MachineOperand &MOBaseReg = MI.getOperand(1);
|
||||
const MachineOperand &MOInsertedReg = MI.getOperand(2);
|
||||
if (MOInsertedReg.isUndef())
|
||||
return false;
|
||||
const MachineOperand &MOSubIdx = MI.getOperand(3);
|
||||
assert(MOSubIdx.isImm() &&
|
||||
"One of the subindex of the reg_sequence is not an immediate");
|
||||
|
@ -1422,7 +1422,8 @@ RuntimeDyldELF::processRelocationRef(
|
||||
SectionEntry &Section = Sections[SectionID];
|
||||
uint8_t *Target = Section.getAddressWithOffset(Offset);
|
||||
bool RangeOverflow = false;
|
||||
if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) {
|
||||
bool IsExtern = Value.SymbolName || SymType == SymbolRef::ST_Unknown;
|
||||
if (!IsExtern) {
|
||||
if (AbiVariant != 2) {
|
||||
// In the ELFv1 ABI, a function call may point to the .opd entry,
|
||||
// so the final symbol value is calculated based on the relocation
|
||||
@ -1432,21 +1433,24 @@ RuntimeDyldELF::processRelocationRef(
|
||||
} else {
|
||||
// In the ELFv2 ABI, a function symbol may provide a local entry
|
||||
// point, which must be used for direct calls.
|
||||
uint8_t SymOther = Symbol->getOther();
|
||||
Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
|
||||
if (Value.SectionID == SectionID){
|
||||
uint8_t SymOther = Symbol->getOther();
|
||||
Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
|
||||
}
|
||||
}
|
||||
uint8_t *RelocTarget =
|
||||
Sections[Value.SectionID].getAddressWithOffset(Value.Addend);
|
||||
int64_t delta = static_cast<int64_t>(Target - RelocTarget);
|
||||
// If it is within 26-bits branch range, just set the branch target
|
||||
if (SignExtend64<26>(delta) == delta) {
|
||||
if (SignExtend64<26>(delta) != delta) {
|
||||
RangeOverflow = true;
|
||||
} else if ((AbiVariant != 2) ||
|
||||
(AbiVariant == 2 && Value.SectionID == SectionID)) {
|
||||
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
|
||||
addRelocationForSection(RE, Value.SectionID);
|
||||
} else {
|
||||
RangeOverflow = true;
|
||||
}
|
||||
}
|
||||
if (Value.SymbolName || SymType == SymbolRef::ST_Unknown ||
|
||||
if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID) ||
|
||||
RangeOverflow) {
|
||||
// It is an external symbol (either Value.SymbolName is set, or
|
||||
// SymType is SymbolRef::ST_Unknown) or out of range.
|
||||
@ -1503,10 +1507,10 @@ RuntimeDyldELF::processRelocationRef(
|
||||
RelType, 0);
|
||||
Section.advanceStubOffset(getMaxStubSize());
|
||||
}
|
||||
if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) {
|
||||
if (IsExtern || (AbiVariant == 2 && Value.SectionID != SectionID)) {
|
||||
// Restore the TOC for external calls
|
||||
if (AbiVariant == 2)
|
||||
writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1)
|
||||
writeInt32BE(Target + 4, 0xE8410018); // ld r2,24(r1)
|
||||
else
|
||||
writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1)
|
||||
}
|
||||
|
@ -359,11 +359,9 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
|
||||
return wrap(&unwrap(Ty)->getContext());
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
LLVM_DUMP_METHOD void LLVMDumpType(LLVMTypeRef Ty) {
|
||||
return unwrap(Ty)->dump();
|
||||
void LLVMDumpType(LLVMTypeRef Ty) {
|
||||
return unwrap(Ty)->print(errs(), /*IsForDebug=*/true);
|
||||
}
|
||||
#endif
|
||||
|
||||
char *LLVMPrintTypeToString(LLVMTypeRef Ty) {
|
||||
std::string buf;
|
||||
@ -658,7 +656,7 @@ void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
|
||||
unwrap(Val)->setName(Name);
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD void LLVMDumpValue(LLVMValueRef Val) {
|
||||
void LLVMDumpValue(LLVMValueRef Val) {
|
||||
unwrap(Val)->print(errs(), /*IsForDebug=*/true);
|
||||
}
|
||||
|
||||
|
@ -289,6 +289,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
|
||||
case Triple::mips64el:
|
||||
FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
|
||||
break;
|
||||
case Triple::ppc64:
|
||||
case Triple::ppc64le:
|
||||
case Triple::x86_64:
|
||||
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
|
||||
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
|
||||
|
@ -1009,7 +1009,7 @@ StringRef sys::getHostCPUName() {
|
||||
#include "llvm/Support/X86TargetParser.def"
|
||||
|
||||
// Now check types.
|
||||
#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \
|
||||
#define X86_CPU_TYPE(ARCHNAME, ENUM) \
|
||||
if (Type == X86::ENUM) \
|
||||
return ARCHNAME;
|
||||
#include "llvm/Support/X86TargetParser.def"
|
||||
|
@ -299,6 +299,11 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
printOffset(MO.getOffset(), O);
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_BlockAddress: {
|
||||
MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
|
||||
Sym->print(O, MAI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/DebugCounter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
@ -60,6 +61,8 @@ STATISTIC(NumCollisionsAvoided,
|
||||
"Number of HW prefetch tag collisions avoided");
|
||||
STATISTIC(NumCollisionsNotAvoided,
|
||||
"Number of HW prefetch tag collisions not avoided due to lack of regsiters");
|
||||
DEBUG_COUNTER(FixCounter, "falkor-hwpf",
|
||||
"Controls which tag collisions are avoided");
|
||||
|
||||
namespace {
|
||||
|
||||
@ -729,6 +732,21 @@ void FalkorHWPFFix::runOnLoop(MachineLoop &L, MachineFunction &Fn) {
|
||||
bool Fixed = false;
|
||||
DEBUG(dbgs() << "Attempting to fix tag collision: " << MI);
|
||||
|
||||
if (!DebugCounter::shouldExecute(FixCounter)) {
|
||||
DEBUG(dbgs() << "Skipping fix due to debug counter:\n " << MI);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add the non-base registers of MI as live so we don't use them as
|
||||
// scratch registers.
|
||||
for (unsigned OpI = 0, OpE = MI.getNumOperands(); OpI < OpE; ++OpI) {
|
||||
if (OpI == static_cast<unsigned>(LdI.BaseRegIdx))
|
||||
continue;
|
||||
MachineOperand &MO = MI.getOperand(OpI);
|
||||
if (MO.isReg() && MO.readsReg())
|
||||
LR.addReg(MO.getReg());
|
||||
}
|
||||
|
||||
for (unsigned ScratchReg : AArch64::GPR64RegClass) {
|
||||
if (!LR.available(ScratchReg) || MRI.isReserved(ScratchReg))
|
||||
continue;
|
||||
|
@ -917,6 +917,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
|
||||
int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
|
||||
int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
|
||||
bool isFixed = MFI.isFixedObjectIndex(FI);
|
||||
bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
|
||||
-((int)AFI->getCalleeSavedStackSize());
|
||||
|
||||
// Use frame pointer to reference fixed objects. Use it for locals if
|
||||
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
|
||||
@ -930,6 +932,12 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
|
||||
// Argument access should always use the FP.
|
||||
if (isFixed) {
|
||||
UseFP = hasFP(MF);
|
||||
} else if (isCSR && RegInfo->needsStackRealignment(MF)) {
|
||||
// References to the CSR area must use FP if we're re-aligning the stack
|
||||
// since the dynamically-sized alignment padding is between the SP/BP and
|
||||
// the CSR area.
|
||||
assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
|
||||
UseFP = true;
|
||||
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
|
||||
!RegInfo->needsStackRealignment(MF)) {
|
||||
// Use SP or FP, whichever gives us the best chance of the offset
|
||||
@ -947,9 +955,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
|
||||
assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
|
||||
"In the presence of dynamic stack pointer realignment, "
|
||||
"non-argument objects cannot be accessed through the frame pointer");
|
||||
"non-argument/CSR objects cannot be accessed through the frame pointer");
|
||||
|
||||
if (UseFP) {
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
|
@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
|
||||
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
|
||||
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
|
||||
// FIXME: We should be able to handle f128 as well with a clever lowering.
|
||||
if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
|
||||
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
|
||||
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
|
||||
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
|
||||
return true;
|
||||
}
|
||||
@ -5066,7 +5067,7 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
|
||||
|
||||
// Table of Constraints
|
||||
// TODO: This is the current set of constraints supported by ARM for the
|
||||
// compiler, not all of them may make sense, e.g. S may be difficult to support.
|
||||
// compiler, not all of them may make sense.
|
||||
//
|
||||
// r - A general register
|
||||
// w - An FP/SIMD register of some size in the range v0-v31
|
||||
@ -5126,6 +5127,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
// currently handle addresses it is the same as 'r'.
|
||||
case 'Q':
|
||||
return C_Memory;
|
||||
case 'S': // A symbolic address
|
||||
return C_Other;
|
||||
}
|
||||
}
|
||||
return TargetLowering::getConstraintType(Constraint);
|
||||
@ -5250,6 +5253,23 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
|
||||
Result = DAG.getRegister(AArch64::WZR, MVT::i32);
|
||||
break;
|
||||
}
|
||||
case 'S': {
|
||||
// An absolute symbolic address or label reference.
|
||||
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
|
||||
Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
|
||||
GA->getValueType(0));
|
||||
} else if (const BlockAddressSDNode *BA =
|
||||
dyn_cast<BlockAddressSDNode>(Op)) {
|
||||
Result =
|
||||
DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
|
||||
} else if (const ExternalSymbolSDNode *ES =
|
||||
dyn_cast<ExternalSymbolSDNode>(Op)) {
|
||||
Result =
|
||||
DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
|
||||
} else
|
||||
return;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'I':
|
||||
case 'J':
|
||||
@ -9637,6 +9657,15 @@ static SDValue performPostLD1Combine(SDNode *N,
|
||||
if (LD->getOpcode() != ISD::LOAD)
|
||||
return SDValue();
|
||||
|
||||
// The vector lane must be a constant in the LD1LANE opcode.
|
||||
SDValue Lane;
|
||||
if (IsLaneOp) {
|
||||
Lane = N->getOperand(2);
|
||||
auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
|
||||
if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
|
||||
EVT MemVT = LoadSDN->getMemoryVT();
|
||||
// Check if memory operand is the same type as the vector element.
|
||||
@ -9693,7 +9722,7 @@ static SDValue performPostLD1Combine(SDNode *N,
|
||||
Ops.push_back(LD->getOperand(0)); // Chain
|
||||
if (IsLaneOp) {
|
||||
Ops.push_back(Vector); // The vector to be inserted
|
||||
Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector
|
||||
Ops.push_back(Lane); // The lane to be inserted in the vector
|
||||
}
|
||||
Ops.push_back(Addr);
|
||||
Ops.push_back(Inc);
|
||||
|
@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">;
|
||||
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
|
||||
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
|
||||
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
|
||||
Sched<[WriteF]>;
|
||||
Sched<[WriteF]>, Requires<[HasFullFP16]>;
|
||||
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
|
||||
Sched<[WriteF]>;
|
||||
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
|
||||
|
@ -147,6 +147,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
initializeR600PacketizerPass(*PR);
|
||||
initializeR600ExpandSpecialInstrsPassPass(*PR);
|
||||
initializeR600VectorRegMergerPass(*PR);
|
||||
initializeGlobalISel(*PR);
|
||||
initializeAMDGPUDAGToDAGISelPass(*PR);
|
||||
initializeSILowerI1CopiesPass(*PR);
|
||||
initializeSIFixSGPRCopiesPass(*PR);
|
||||
|
@ -358,6 +358,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
|
||||
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
|
||||
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
|
||||
|
||||
|
@ -726,6 +726,10 @@ def : GCNPat <
|
||||
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
|
||||
(V_BCNT_U32_B32_e64 $popcnt, $val)
|
||||
>;
|
||||
def : GCNPat <
|
||||
(i16 (add (i16 (trunc (ctpop i32:$popcnt))), i16:$val)),
|
||||
(V_BCNT_U32_B32_e64 $popcnt, $val)
|
||||
>;
|
||||
|
||||
/********** ============================================ **********/
|
||||
/********** Extraction, Insertion, Building and Casting **********/
|
||||
|
@ -4864,12 +4864,14 @@ bool ARMBaseInstrInfo::getRegSequenceLikeInputs(
|
||||
// Populate the InputRegs accordingly.
|
||||
// rY
|
||||
const MachineOperand *MOReg = &MI.getOperand(1);
|
||||
InputRegs.push_back(
|
||||
RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0));
|
||||
if (!MOReg->isUndef())
|
||||
InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
|
||||
MOReg->getSubReg(), ARM::ssub_0));
|
||||
// rZ
|
||||
MOReg = &MI.getOperand(2);
|
||||
InputRegs.push_back(
|
||||
RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1));
|
||||
if (!MOReg->isUndef())
|
||||
InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),
|
||||
MOReg->getSubReg(), ARM::ssub_1));
|
||||
return true;
|
||||
}
|
||||
llvm_unreachable("Target dependent opcode missing");
|
||||
@ -4888,6 +4890,8 @@ bool ARMBaseInstrInfo::getExtractSubregLikeInputs(
|
||||
// rX = EXTRACT_SUBREG dZ, ssub_0
|
||||
// rY = EXTRACT_SUBREG dZ, ssub_1
|
||||
const MachineOperand &MOReg = MI.getOperand(2);
|
||||
if (MOReg.isUndef())
|
||||
return false;
|
||||
InputReg.Reg = MOReg.getReg();
|
||||
InputReg.SubReg = MOReg.getSubReg();
|
||||
InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;
|
||||
@ -4907,6 +4911,8 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs(
|
||||
// dX = VSETLNi32 dY, rZ, imm
|
||||
const MachineOperand &MOBaseReg = MI.getOperand(1);
|
||||
const MachineOperand &MOInsertedReg = MI.getOperand(2);
|
||||
if (MOInsertedReg.isUndef())
|
||||
return false;
|
||||
const MachineOperand &MOIndex = MI.getOperand(3);
|
||||
BaseReg.Reg = MOBaseReg.getReg();
|
||||
BaseReg.SubReg = MOBaseReg.getSubReg();
|
||||
|
@ -35,6 +35,7 @@ mayOptimizeThumb2Instruction(const MachineInstr *MI) {
|
||||
case ARM::tBcc:
|
||||
// optimizeThumb2JumpTables.
|
||||
case ARM::t2BR_JT:
|
||||
case ARM::tBR_JTr:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -5136,6 +5136,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
|
||||
// It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction
|
||||
// and registers Rd and Base for microMIPS lwp instruction
|
||||
case Mips::JALR_HB:
|
||||
case Mips::JALR_HB64:
|
||||
case Mips::JALRC_HB_MMR6:
|
||||
case Mips::JALRC_MMR6:
|
||||
if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
|
||||
|
@ -225,6 +225,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
|
||||
switch (Kind) {
|
||||
case Mips::fixup_Mips_NONE:
|
||||
return ELF::R_MIPS_NONE;
|
||||
case FK_Data_1:
|
||||
report_fatal_error("MIPS does not support one byte relocations");
|
||||
case Mips::fixup_Mips_16:
|
||||
case FK_Data_2:
|
||||
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
|
||||
|
@ -1886,6 +1886,12 @@ let AddedComplexity = 41 in {
|
||||
|
||||
def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
|
||||
|
||||
def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6;
|
||||
|
||||
def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase<JRC16_MMR6,
|
||||
GPR32Opnd>,
|
||||
ISA_MICROMIPS32R6;
|
||||
|
||||
def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
|
||||
(TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
|
||||
|
||||
|
@ -1003,6 +1003,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
|
||||
|
||||
def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
|
||||
|
||||
def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>,
|
||||
ISA_MICROMIPS32_NOT_MIPS32R6;
|
||||
|
||||
def PseudoIndirectBranch_MM : PseudoIndirectBranchBase<JR_MM, GPR32Opnd>,
|
||||
ISA_MICROMIPS32_NOT_MIPS32R6;
|
||||
|
||||
let DecoderNamespace = "MicroMips" in {
|
||||
def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
|
||||
RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
|
||||
|
@ -193,6 +193,10 @@ def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">;
|
||||
def FeatureLongCalls : SubtargetFeature<"long-calls", "UseLongCalls", "true",
|
||||
"Disable use of the jal instruction">;
|
||||
|
||||
def FeatureUseIndirectJumpsHazard : SubtargetFeature<"use-indirect-jump-hazard",
|
||||
"UseIndirectJumpsHazard",
|
||||
"true", "Use indirect jump"
|
||||
" guards to prevent certain speculation based attacks">;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Mips processors supported.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1036,3 +1036,42 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
|
||||
(SELEQZ i32:$f, i32:$cond)>,
|
||||
ISA_MIPS32R6;
|
||||
}
|
||||
|
||||
// Pseudo instructions
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
|
||||
hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
|
||||
class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
|
||||
PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
|
||||
PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
|
||||
}
|
||||
|
||||
class PseudoIndirectBranchBaseR6<Instruction JumpInst, Register RT,
|
||||
RegisterOperand RO> :
|
||||
MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
|
||||
II_IndirectBranchPseudo>,
|
||||
PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)> {
|
||||
let isTerminator=1;
|
||||
let isBarrier=1;
|
||||
let hasDelaySlot = 1;
|
||||
let isBranch = 1;
|
||||
let isIndirectBranch = 1;
|
||||
bit isCTI = 1;
|
||||
}
|
||||
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
NoIndirectJumpGuards] in {
|
||||
def TAILCALLR6REG : TailCallRegR6<JALR, ZERO, GPR32Opnd>, ISA_MIPS32R6;
|
||||
def PseudoIndirectBranchR6 : PseudoIndirectBranchBaseR6<JALR, ZERO,
|
||||
GPR32Opnd>,
|
||||
ISA_MIPS32R6;
|
||||
}
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
UseIndirectJumpsHazard] in {
|
||||
def TAILCALLHBR6REG : TailCallReg<JR_HB_R6, GPR32Opnd>, ISA_MIPS32R6;
|
||||
def PseudoIndrectHazardBranchR6 : PseudoIndirectBranchBase<JR_HB_R6,
|
||||
GPR32Opnd>,
|
||||
ISA_MIPS32R6;
|
||||
}
|
||||
|
||||
|
@ -240,13 +240,32 @@ let isCodeGenOnly = 1 in {
|
||||
def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
|
||||
def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
|
||||
def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
|
||||
def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
|
||||
let AdditionalPredicates = [NoIndirectJumpGuards] in
|
||||
def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
|
||||
}
|
||||
let AdditionalPredicates = [NotInMicroMips],
|
||||
DecoderNamespace = "Mips64" in {
|
||||
def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
|
||||
def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2;
|
||||
}
|
||||
def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
NoIndirectJumpGuards] in {
|
||||
def TAILCALLREG64 : TailCallReg<JR64, GPR64Opnd>, ISA_MIPS3_NOT_32R6_64R6,
|
||||
PTR_64;
|
||||
def PseudoIndirectBranch64 : PseudoIndirectBranchBase<JR64, GPR64Opnd>,
|
||||
ISA_MIPS3_NOT_32R6_64R6;
|
||||
}
|
||||
|
||||
def TAILCALLREG64 : TailCallReg<GPR64Opnd>;
|
||||
|
||||
def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
|
||||
def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
UseIndirectJumpsHazard] in {
|
||||
def TAILCALLREGHB64 : TailCallReg<JR_HB64, GPR64Opnd>,
|
||||
ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
|
||||
def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64,
|
||||
GPR64Opnd>,
|
||||
ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
}
|
||||
|
||||
/// Multiply and Divide Instructions.
|
||||
let AdditionalPredicates = [NotInMicroMips] in {
|
||||
@ -536,6 +555,10 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>,
|
||||
ISA_MIPS3;
|
||||
}
|
||||
|
||||
|
||||
let AdditionalPredicates = [UseIndirectJumpsHazard] in
|
||||
def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Arbitrary patterns that map to one or more instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -843,7 +866,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
|
||||
def : MipsInstAlias<"dext $rt, $rs, $pos, $size",
|
||||
(DEXTU GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5_plus32:$pos,
|
||||
uimm5_plus1:$size), 0>, ISA_MIPS64R2;
|
||||
|
||||
def : MipsInstAlias<"jalr.hb $rs", (JALR_HB64 RA_64, GPR64Opnd:$rs), 1>,
|
||||
ISA_MIPS64;
|
||||
// Two operand (implicit 0 selector) versions:
|
||||
def : MipsInstAlias<"dmtc0 $rt, $rd",
|
||||
(DMTC0 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>;
|
||||
|
@ -104,6 +104,16 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd,
|
||||
|
||||
class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>;
|
||||
class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>;
|
||||
|
||||
class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> {
|
||||
bit isBranch = 1;
|
||||
bit isIndirectBranch = 1;
|
||||
bit hasDelaySlot = 1;
|
||||
bit isTerminator=1;
|
||||
bit isBarrier=1;
|
||||
bit isCTI = 1;
|
||||
InstrItinClass Itinerary = II_JR_HB;
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Instruction Definitions
|
||||
@ -136,6 +146,7 @@ def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
|
||||
let DecoderNamespace = "Mips32r6_64r6_GP64" in {
|
||||
def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
|
||||
def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64;
|
||||
def JR_HB64_R6 : JR_HB_R6_ENC, JR_HB64_R6_DESC, ISA_MIPS32R6;
|
||||
}
|
||||
let AdditionalPredicates = [NotInMicroMips],
|
||||
DecoderNamespace = "Mips32r6_64r6_PTR64" in {
|
||||
@ -277,3 +288,22 @@ def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f),
|
||||
def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
|
||||
(SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>,
|
||||
ISA_MIPS64R6;
|
||||
|
||||
// Pseudo instructions
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
NoIndirectJumpGuards] in {
|
||||
def TAILCALL64R6REG : TailCallRegR6<JALR64, ZERO_64, GPR64Opnd>, ISA_MIPS64R6;
|
||||
def PseudoIndirectBranch64R6 : PseudoIndirectBranchBaseR6<JALR64, ZERO_64,
|
||||
GPR64Opnd>,
|
||||
ISA_MIPS64R6;
|
||||
}
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
UseIndirectJumpsHazard] in {
|
||||
def TAILCALLHB64R6REG : TailCallReg<JR_HB64_R6, GPR64Opnd>,
|
||||
ISA_MIPS64R6;
|
||||
def PseudoIndrectHazardBranch64R6 : PseudoIndirectBranchBase<JR_HB64_R6,
|
||||
GPR64Opnd>,
|
||||
ISA_MIPS64R6;
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ class DSPInst<string opstr = "">
|
||||
|
||||
class PseudoDSP<dag outs, dag ins, list<dag> pattern,
|
||||
InstrItinClass itin = IIPseudo>
|
||||
: MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
|
||||
: MipsPseudo<outs, ins, pattern, itin> {
|
||||
let InsnPredicates = [HasDSP];
|
||||
}
|
||||
|
||||
|
@ -67,6 +67,7 @@
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
@ -1306,13 +1307,13 @@ bool MipsFastISel::fastLowerArguments() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ArrayRef<MCPhysReg> GPR32ArgRegs = {Mips::A0, Mips::A1, Mips::A2,
|
||||
Mips::A3};
|
||||
const ArrayRef<MCPhysReg> FGR32ArgRegs = {Mips::F12, Mips::F14};
|
||||
const ArrayRef<MCPhysReg> AFGR64ArgRegs = {Mips::D6, Mips::D7};
|
||||
ArrayRef<MCPhysReg>::iterator NextGPR32 = GPR32ArgRegs.begin();
|
||||
ArrayRef<MCPhysReg>::iterator NextFGR32 = FGR32ArgRegs.begin();
|
||||
ArrayRef<MCPhysReg>::iterator NextAFGR64 = AFGR64ArgRegs.begin();
|
||||
std::array<MCPhysReg, 4> GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2,
|
||||
Mips::A3}};
|
||||
std::array<MCPhysReg, 2> FGR32ArgRegs = {{Mips::F12, Mips::F14}};
|
||||
std::array<MCPhysReg, 2> AFGR64ArgRegs = {{Mips::D6, Mips::D7}};
|
||||
auto NextGPR32 = GPR32ArgRegs.begin();
|
||||
auto NextFGR32 = FGR32ArgRegs.begin();
|
||||
auto NextAFGR64 = AFGR64ArgRegs.begin();
|
||||
|
||||
struct AllocatedReg {
|
||||
const TargetRegisterClass *RC;
|
||||
|
@ -3868,7 +3868,7 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
return std::make_pair(0U, nullptr);
|
||||
case 'l': // use the `lo` register to store values
|
||||
// that are no bigger than a word
|
||||
if (VT == MVT::i32)
|
||||
if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8)
|
||||
return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
|
||||
return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
|
||||
case 'x': // use the concatenated `hi` and `lo` registers
|
||||
|
@ -128,7 +128,7 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
|
||||
// Mips Pseudo Instructions Format
|
||||
class MipsPseudo<dag outs, dag ins, list<dag> pattern,
|
||||
InstrItinClass itin = IIPseudo> :
|
||||
MipsInst<outs, ins, "", pattern, itin, Pseudo> {
|
||||
MipsInst<outs, ins, "", pattern, itin, Pseudo>, PredicateControl {
|
||||
let isCodeGenOnly = 1;
|
||||
let isPseudo = 1;
|
||||
}
|
||||
@ -136,7 +136,7 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern,
|
||||
// Mips32/64 Pseudo Instruction Format
|
||||
class PseudoSE<dag outs, dag ins, list<dag> pattern,
|
||||
InstrItinClass itin = IIPseudo> :
|
||||
MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
|
||||
MipsPseudo<outs, ins, pattern, itin> {
|
||||
let EncodingPredicates = [HasStdEnc];
|
||||
}
|
||||
|
||||
|
@ -298,7 +298,6 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
|
||||
case Mips::JR:
|
||||
case Mips::PseudoReturn:
|
||||
case Mips::PseudoIndirectBranch:
|
||||
case Mips::TAILCALLREG:
|
||||
canUseShortMicroMipsCTI = true;
|
||||
break;
|
||||
}
|
||||
@ -377,18 +376,18 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
|
||||
// For MIPSR6, the instruction 'jic' can be used for these cases. Some
|
||||
// tools will accept 'jrc reg' as an alias for 'jic 0, $reg'.
|
||||
case Mips::JR:
|
||||
case Mips::PseudoIndirectBranchR6:
|
||||
case Mips::PseudoReturn:
|
||||
case Mips::PseudoIndirectBranch:
|
||||
case Mips::TAILCALLREG:
|
||||
case Mips::TAILCALLR6REG:
|
||||
if (canUseShortMicroMipsCTI)
|
||||
return Mips::JRC16_MM;
|
||||
return Mips::JIC;
|
||||
case Mips::JALRPseudo:
|
||||
return Mips::JIALC;
|
||||
case Mips::JR64:
|
||||
case Mips::PseudoIndirectBranch64R6:
|
||||
case Mips::PseudoReturn64:
|
||||
case Mips::PseudoIndirectBranch64:
|
||||
case Mips::TAILCALLREG64:
|
||||
case Mips::TAILCALL64R6REG:
|
||||
return Mips::JIC64;
|
||||
case Mips::JALR64Pseudo:
|
||||
return Mips::JIALC64;
|
||||
@ -617,6 +616,18 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI,
|
||||
return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 32, 64, 32, 64);
|
||||
case Mips::DEXTU:
|
||||
return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64);
|
||||
case Mips::TAILCALLREG:
|
||||
case Mips::PseudoIndirectBranch:
|
||||
case Mips::JR:
|
||||
case Mips::JR64:
|
||||
case Mips::JALR:
|
||||
case Mips::JALR64:
|
||||
case Mips::JALRPseudo:
|
||||
if (!Subtarget.useIndirectJumpsHazard())
|
||||
return true;
|
||||
|
||||
ErrInfo = "invalid instruction when using jump guards!";
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -244,7 +244,10 @@ def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
|
||||
AssemblerPredicate<"!FeatureMadd4">;
|
||||
def HasMT : Predicate<"Subtarget->hasMT()">,
|
||||
AssemblerPredicate<"FeatureMT">;
|
||||
|
||||
def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,
|
||||
AssemblerPredicate<"FeatureUseIndirectJumpsHazard">;
|
||||
def NoIndirectJumpGuards : Predicate<"!Subtarget->useIndirectJumpsHazard()">,
|
||||
AssemblerPredicate<"!FeatureUseIndirectJumpsHazard">;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Mips GPR size adjectives.
|
||||
// They are mutually exclusive.
|
||||
@ -1540,8 +1543,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
|
||||
PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
|
||||
PseudoInstExpansion<(JumpInst Opnd:$target)>;
|
||||
|
||||
class TailCallReg<RegisterOperand RO> :
|
||||
PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
|
||||
class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
|
||||
PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
|
||||
PseudoInstExpansion<(JumpInst RO:$rs)>;
|
||||
}
|
||||
|
||||
class BAL_BR_Pseudo<Instruction RealInst> :
|
||||
@ -2068,7 +2072,7 @@ def B : UncondBranch<BEQ, brtarget>,
|
||||
AdditionalRequires<[NotInMicroMips]>;
|
||||
|
||||
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
|
||||
let AdditionalPredicates = [NotInMicroMips] in {
|
||||
let AdditionalPredicates = [NotInMicroMips, NoIndirectJumpGuards] in {
|
||||
def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
|
||||
def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
|
||||
}
|
||||
@ -2088,24 +2092,28 @@ def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips] in {
|
||||
def TAILCALL : TailCall<J, jmptarget>;
|
||||
}
|
||||
|
||||
def TAILCALLREG : TailCallReg<GPR32Opnd>;
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
NoIndirectJumpGuards] in
|
||||
def TAILCALLREG : TailCallReg<JR, GPR32Opnd>, ISA_MIPS1_NOT_32R6_64R6;
|
||||
|
||||
// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
|
||||
// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
|
||||
class PseudoIndirectBranchBase<RegisterOperand RO> :
|
||||
class PseudoIndirectBranchBase<Instruction JumpInst, RegisterOperand RO> :
|
||||
MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
|
||||
II_IndirectBranchPseudo> {
|
||||
II_IndirectBranchPseudo>,
|
||||
PseudoInstExpansion<(JumpInst RO:$rs)> {
|
||||
let isTerminator=1;
|
||||
let isBarrier=1;
|
||||
let hasDelaySlot = 1;
|
||||
let isBranch = 1;
|
||||
let isIndirectBranch = 1;
|
||||
bit isCTI = 1;
|
||||
let Predicates = [NotInMips16Mode];
|
||||
}
|
||||
|
||||
def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
NoIndirectJumpGuards] in
|
||||
def PseudoIndirectBranch : PseudoIndirectBranchBase<JR, GPR32Opnd>,
|
||||
ISA_MIPS1_NOT_32R6_64R6;
|
||||
|
||||
// Return instructions are matched as a RetRA instruction, then are expanded
|
||||
// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
|
||||
@ -2278,8 +2286,8 @@ class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
|
||||
list<dag> Pattern = [];
|
||||
}
|
||||
|
||||
class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
|
||||
JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
|
||||
class JR_HB_DESC<RegisterOperand RO> :
|
||||
InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>, JR_HB_DESC_BASE<"jr.hb", RO> {
|
||||
let isBranch=1;
|
||||
let isIndirectBranch=1;
|
||||
let hasDelaySlot=1;
|
||||
@ -2288,8 +2296,9 @@ class JR_HB_DESC : InstSE<(outs), (ins), "", [], II_JR_HB, FrmJ>,
|
||||
bit isCTI = 1;
|
||||
}
|
||||
|
||||
class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
|
||||
JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> {
|
||||
class JALR_HB_DESC<RegisterOperand RO> :
|
||||
InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>, JALR_HB_DESC_BASE<"jalr.hb",
|
||||
RO> {
|
||||
let isIndirectBranch=1;
|
||||
let hasDelaySlot=1;
|
||||
bit isCTI = 1;
|
||||
@ -2298,8 +2307,19 @@ class JALR_HB_DESC : InstSE<(outs), (ins), "", [], II_JALR_HB, FrmJ>,
|
||||
class JR_HB_ENC : JR_HB_FM<8>;
|
||||
class JALR_HB_ENC : JALR_HB_FM<9>;
|
||||
|
||||
def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
|
||||
def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
|
||||
def JR_HB : JR_HB_DESC<GPR32Opnd>, JR_HB_ENC, ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
def JALR_HB : JALR_HB_DESC<GPR32Opnd>, JALR_HB_ENC, ISA_MIPS32;
|
||||
|
||||
let AdditionalPredicates = [NotInMicroMips, UseIndirectJumpsHazard] in
|
||||
def JALRHBPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR_HB, RA>;
|
||||
|
||||
|
||||
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
|
||||
UseIndirectJumpsHazard] in {
|
||||
def TAILCALLREGHB : TailCallReg<JR_HB, GPR32Opnd>, ISA_MIPS32_NOT_32R6_64R6;
|
||||
def PseudoIndirectHazardBranch : PseudoIndirectBranchBase<JR_HB, GPR32Opnd>,
|
||||
ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
}
|
||||
|
||||
class TLB<string asmstr, InstrItinClass itin = NoItinerary> :
|
||||
InstSE<(outs), (ins), asmstr, [], itin, FrmOther, asmstr>;
|
||||
@ -2433,7 +2453,8 @@ def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
|
||||
let Predicates = [NotInMicroMips] in {
|
||||
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
|
||||
}
|
||||
def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
|
||||
def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>,
|
||||
ISA_MIPS32;
|
||||
def : MipsInstAlias<"neg $rt, $rs",
|
||||
(SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
|
||||
def : MipsInstAlias<"neg $rt",
|
||||
|
@ -371,11 +371,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
|
||||
|
||||
// In NaCl, modifying the sp is not allowed in branch delay slot.
|
||||
// For MIPS32R6, we can skip using a delay slot branch.
|
||||
if (Subtarget.isTargetNaCl() || Subtarget.hasMips32r6())
|
||||
if (Subtarget.isTargetNaCl() ||
|
||||
(Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()))
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
|
||||
.addReg(Mips::SP).addImm(8);
|
||||
|
||||
if (Subtarget.hasMips32r6()) {
|
||||
if (Subtarget.hasMips32r6() && !Subtarget.useIndirectJumpsHazard()) {
|
||||
const unsigned JICOp =
|
||||
Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC;
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp))
|
||||
@ -383,7 +384,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
|
||||
.addImm(0);
|
||||
|
||||
} else {
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
|
||||
unsigned JROp =
|
||||
Subtarget.useIndirectJumpsHazard()
|
||||
? (Subtarget.hasMips32r6() ? Mips::JR_HB_R6 : Mips::JR_HB)
|
||||
: Mips::JR;
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT);
|
||||
|
||||
if (Subtarget.isTargetNaCl()) {
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP));
|
||||
@ -475,7 +480,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
|
||||
.addReg(Mips::SP_64).addImm(0);
|
||||
|
||||
if (Subtarget.hasMips64r6()) {
|
||||
if (Subtarget.hasMips64r6() && !Subtarget.useIndirectJumpsHazard()) {
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
|
||||
.addReg(Mips::SP_64)
|
||||
.addImm(16);
|
||||
@ -483,7 +488,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
|
||||
.addReg(Mips::AT_64)
|
||||
.addImm(0);
|
||||
} else {
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
|
||||
unsigned JROp =
|
||||
Subtarget.useIndirectJumpsHazard()
|
||||
? (Subtarget.hasMips32r6() ? Mips::JR_HB64_R6 : Mips::JR_HB64)
|
||||
: Mips::JR64;
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(JROp)).addReg(Mips::AT_64);
|
||||
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
|
||||
.addReg(Mips::SP_64)
|
||||
.addImm(16);
|
||||
|
@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
|
||||
SelectionDAG &DAG,
|
||||
const MipsSubtarget &Subtarget) {
|
||||
// Estimate the number of operations the below transform will turn a
|
||||
// constant multiply into. The number is approximately how many powers
|
||||
// of two summed together that the constant can be broken down into.
|
||||
|
||||
SmallVector<APInt, 16> WorkStack(1, C);
|
||||
unsigned Steps = 0;
|
||||
unsigned BitWidth = C.getBitWidth();
|
||||
|
||||
while (!WorkStack.empty()) {
|
||||
APInt Val = WorkStack.pop_back_val();
|
||||
|
||||
if (Val == 0 || Val == 1)
|
||||
continue;
|
||||
|
||||
if (Val.isPowerOf2()) {
|
||||
++Steps;
|
||||
continue;
|
||||
}
|
||||
|
||||
APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
|
||||
APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
|
||||
: APInt(BitWidth, 1) << C.ceilLogBase2();
|
||||
|
||||
if ((Val - Floor).ule(Ceil - Val)) {
|
||||
WorkStack.push_back(Floor);
|
||||
WorkStack.push_back(Val - Floor);
|
||||
++Steps;
|
||||
continue;
|
||||
}
|
||||
|
||||
WorkStack.push_back(Ceil);
|
||||
WorkStack.push_back(Ceil - Val);
|
||||
++Steps;
|
||||
|
||||
// If we have taken more than 12[1] / 8[2] steps to attempt the
|
||||
// optimization for a native sized value, it is more than likely that this
|
||||
// optimization will make things worse.
|
||||
//
|
||||
// [1] MIPS64 requires 6 instructions at most to materialize any constant,
|
||||
// multiplication requires at least 4 cycles, but another cycle (or two)
|
||||
// to retrieve the result from the HI/LO registers.
|
||||
//
|
||||
// [2] For MIPS32, more than 8 steps is expensive as the constant could be
|
||||
// materialized in 2 instructions, multiplication requires at least 4
|
||||
// cycles, but another cycle (or two) to retrieve the result from the
|
||||
// HI/LO registers.
|
||||
|
||||
if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
|
||||
return false;
|
||||
|
||||
if (Steps > 8 && Subtarget.isABI_O32())
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the value being multiplied is not supported natively, we have to pay
|
||||
// an additional legalization cost, conservatively assume an increase in the
|
||||
// cost of 3 instructions per step. This values for this heuristic were
|
||||
// determined experimentally.
|
||||
unsigned RegisterSize = DAG.getTargetLoweringInfo()
|
||||
.getRegisterType(*DAG.getContext(), VT)
|
||||
.getSizeInBits();
|
||||
Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
|
||||
if (Steps > 27)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
|
||||
EVT ShiftTy, SelectionDAG &DAG) {
|
||||
// Return 0.
|
||||
@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
|
||||
|
||||
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const TargetLowering::DAGCombinerInfo &DCI,
|
||||
const MipsSETargetLowering *TL) {
|
||||
const MipsSETargetLowering *TL,
|
||||
const MipsSubtarget &Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
|
||||
if (!VT.isVector())
|
||||
if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
|
||||
C->getAPIntValue(), VT, DAG, Subtarget))
|
||||
return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
|
||||
TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
|
||||
DAG);
|
||||
@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
Val = performORCombine(N, DAG, DCI, Subtarget);
|
||||
break;
|
||||
case ISD::MUL:
|
||||
return performMULCombine(N, DAG, DCI, this);
|
||||
return performMULCombine(N, DAG, DCI, this, Subtarget);
|
||||
case ISD::SHL:
|
||||
Val = performSHLCombine(N, DAG, DCI, Subtarget);
|
||||
break;
|
||||
|
@ -72,9 +72,10 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
|
||||
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
|
||||
HasEVA(false), DisableMadd4(false), HasMT(false),
|
||||
StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT),
|
||||
TSInfo(), InstrInfo(MipsInstrInfo::create(
|
||||
initializeSubtargetDependencies(CPU, FS, TM))),
|
||||
UseIndirectJumpsHazard(false), StackAlignOverride(StackAlignOverride),
|
||||
TM(TM), TargetTriple(TT), TSInfo(),
|
||||
InstrInfo(
|
||||
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
|
||||
FrameLowering(MipsFrameLowering::create(*this)),
|
||||
TLInfo(MipsTargetLowering::create(TM, *this)) {
|
||||
|
||||
@ -107,6 +108,15 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
||||
if (hasMips64r6() && InMicroMipsMode)
|
||||
report_fatal_error("microMIPS64R6 is not supported", false);
|
||||
|
||||
|
||||
if (UseIndirectJumpsHazard) {
|
||||
if (InMicroMipsMode)
|
||||
report_fatal_error(
|
||||
"cannot combine indirect jumps with hazard barriers and microMIPS");
|
||||
if (!hasMips32r2())
|
||||
report_fatal_error(
|
||||
"indirect jumps with hazard barriers requires MIPS32R2 or later");
|
||||
}
|
||||
if (hasMips32r6()) {
|
||||
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
|
||||
|
||||
|
@ -152,6 +152,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
|
||||
// HasMT -- support MT ASE.
|
||||
bool HasMT;
|
||||
|
||||
// Use hazard variants of the jump register instructions for indirect
|
||||
// function calls and jump tables.
|
||||
bool UseIndirectJumpsHazard;
|
||||
|
||||
// Disable use of the `jal` instruction.
|
||||
bool UseLongCalls = false;
|
||||
|
||||
@ -272,6 +276,9 @@ public:
|
||||
bool disableMadd4() const { return DisableMadd4; }
|
||||
bool hasEVA() const { return HasEVA; }
|
||||
bool hasMT() const { return HasMT; }
|
||||
bool useIndirectJumpsHazard() const {
|
||||
return UseIndirectJumpsHazard && hasMips32r2();
|
||||
}
|
||||
bool useSmallSection() const { return UseSmallSection; }
|
||||
|
||||
bool hasStandardEncoding() const { return !inMips16Mode(); }
|
||||
|
@ -44,6 +44,14 @@ static cl::opt<bool>
|
||||
cl::desc("Disable load/store vectorizer"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
// TODO: Remove this flag when we are confident with no regressions.
|
||||
static cl::opt<bool> DisableRequireStructuredCFG(
|
||||
"disable-nvptx-require-structured-cfg",
|
||||
cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
|
||||
"structured CFG. The requirement should be disabled only when "
|
||||
"unexpected regressions happen."),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
namespace llvm {
|
||||
|
||||
void initializeNVVMIntrRangePass(PassRegistry&);
|
||||
@ -108,6 +116,8 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
|
||||
drvInterface = NVPTX::NVCL;
|
||||
else
|
||||
drvInterface = NVPTX::CUDA;
|
||||
if (!DisableRequireStructuredCFG)
|
||||
setRequiresStructuredCFG(true);
|
||||
initAsmInfo();
|
||||
}
|
||||
|
||||
|
@ -12264,6 +12264,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
N->getOperand(1).getValueType() == MVT::i16 ||
|
||||
(Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
|
||||
N->getOperand(1).getValueType() == MVT::i64))) {
|
||||
// STBRX can only handle simple types.
|
||||
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
|
||||
if (mVT.isExtended())
|
||||
break;
|
||||
|
||||
SDValue BSwapOp = N->getOperand(1).getOperand(0);
|
||||
// Do an any-extend to 32-bits if this is a half-word input.
|
||||
if (BSwapOp.getValueType() == MVT::i16)
|
||||
@ -12271,7 +12276,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
// If the type of BSWAP operand is wider than stored memory width
|
||||
// it need to be shifted to the right side before STBRX.
|
||||
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
|
||||
if (Op1VT.bitsGT(mVT)) {
|
||||
int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
|
||||
BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
|
||||
|
@ -2431,7 +2431,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
||||
// Use APInt's rotate function.
|
||||
int64_t SH = MI.getOperand(2).getImm();
|
||||
int64_t MB = MI.getOperand(3).getImm();
|
||||
APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
|
||||
APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICLo) ?
|
||||
64 : 32, SExtImm, true);
|
||||
InVal = InVal.rotl(SH);
|
||||
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
|
||||
InVal &= Mask;
|
||||
@ -2444,6 +2445,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
||||
Is64BitLI = Opc != PPC::RLDICL_32;
|
||||
NewImm = InVal.getSExtValue();
|
||||
SetCR = Opc == PPC::RLDICLo;
|
||||
if (SetCR && (SExtImm & NewImm) != NewImm)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
@ -2471,6 +2474,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
||||
Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o;
|
||||
NewImm = InVal.getSExtValue();
|
||||
SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o;
|
||||
if (SetCR && (SExtImm & NewImm) != NewImm)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Printable.h"
|
||||
#include <bitset>
|
||||
|
||||
using namespace llvm;
|
||||
@ -262,25 +263,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// An Instruction Converter which completely deletes an instruction.
|
||||
/// For example, IMPLICIT_DEF instructions can be deleted when converting from
|
||||
/// GPR to mask.
|
||||
class InstrDeleter : public InstrConverterBase {
|
||||
public:
|
||||
InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {}
|
||||
|
||||
bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
|
||||
MachineRegisterInfo *MRI) const override {
|
||||
assert(isLegal(MI, TII) && "Cannot convert instruction");
|
||||
return true;
|
||||
}
|
||||
|
||||
double getExtraCost(const MachineInstr *MI,
|
||||
MachineRegisterInfo *MRI) const override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Key type to be used by the Instruction Converters map.
|
||||
// A converter is identified by <destination domain, source opcode>
|
||||
typedef std::pair<int, unsigned> InstrConverterBaseKeyTy;
|
||||
@ -310,8 +292,12 @@ private:
|
||||
/// Domains which this closure can legally be reassigned to.
|
||||
std::bitset<NumDomains> LegalDstDomains;
|
||||
|
||||
/// An ID to uniquely identify this closure, even when it gets
|
||||
/// moved around
|
||||
unsigned ID;
|
||||
|
||||
public:
|
||||
Closure(std::initializer_list<RegDomain> LegalDstDomainList) {
|
||||
Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) {
|
||||
for (RegDomain D : LegalDstDomainList)
|
||||
LegalDstDomains.set(D);
|
||||
}
|
||||
@ -347,6 +333,27 @@ public:
|
||||
return Instrs;
|
||||
}
|
||||
|
||||
LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const {
|
||||
dbgs() << "Registers: ";
|
||||
bool First = true;
|
||||
for (unsigned Reg : Edges) {
|
||||
if (!First)
|
||||
dbgs() << ", ";
|
||||
First = false;
|
||||
dbgs() << printReg(Reg, MRI->getTargetRegisterInfo());
|
||||
}
|
||||
dbgs() << "\n" << "Instructions:";
|
||||
for (MachineInstr *MI : Instrs) {
|
||||
dbgs() << "\n ";
|
||||
MI->print(dbgs());
|
||||
}
|
||||
dbgs() << "\n";
|
||||
}
|
||||
|
||||
unsigned getID() const {
|
||||
return ID;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class X86DomainReassignment : public MachineFunctionPass {
|
||||
@ -358,7 +365,7 @@ class X86DomainReassignment : public MachineFunctionPass {
|
||||
DenseSet<unsigned> EnclosedEdges;
|
||||
|
||||
/// All instructions that are included in some closure.
|
||||
DenseMap<MachineInstr *, Closure *> EnclosedInstrs;
|
||||
DenseMap<MachineInstr *, unsigned> EnclosedInstrs;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
@ -435,14 +442,14 @@ void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg,
|
||||
void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
|
||||
auto I = EnclosedInstrs.find(MI);
|
||||
if (I != EnclosedInstrs.end()) {
|
||||
if (I->second != &C)
|
||||
if (I->second != C.getID())
|
||||
// Instruction already belongs to another closure, avoid conflicts between
|
||||
// closure and mark this closure as illegal.
|
||||
C.setAllIllegal();
|
||||
return;
|
||||
}
|
||||
|
||||
EnclosedInstrs[MI] = &C;
|
||||
EnclosedInstrs[MI] = C.getID();
|
||||
C.addInstruction(MI);
|
||||
|
||||
// Mark closure as illegal for reassignment to domains, if there is no
|
||||
@ -587,7 +594,7 @@ void X86DomainReassignment::initConverters() {
|
||||
new InstrIgnore(TargetOpcode::PHI);
|
||||
|
||||
Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] =
|
||||
new InstrDeleter(TargetOpcode::IMPLICIT_DEF);
|
||||
new InstrIgnore(TargetOpcode::IMPLICIT_DEF);
|
||||
|
||||
Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] =
|
||||
new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2);
|
||||
@ -723,6 +730,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
|
||||
std::vector<Closure> Closures;
|
||||
|
||||
// Go over all virtual registers and calculate a closure.
|
||||
unsigned ClosureID = 0;
|
||||
for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
|
||||
unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx);
|
||||
|
||||
@ -735,7 +743,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
|
||||
continue;
|
||||
|
||||
// Calculate closure starting with Reg.
|
||||
Closure C({MaskDomain});
|
||||
Closure C(ClosureID++, {MaskDomain});
|
||||
buildClosure(C, Reg);
|
||||
|
||||
// Collect all closures that can potentially be converted.
|
||||
@ -743,15 +751,16 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
|
||||
Closures.push_back(std::move(C));
|
||||
}
|
||||
|
||||
for (Closure &C : Closures)
|
||||
for (Closure &C : Closures) {
|
||||
DEBUG(C.dump(MRI));
|
||||
if (isReassignmentProfitable(C, MaskDomain)) {
|
||||
reassign(C, MaskDomain);
|
||||
++NumClosuresConverted;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto I : Converters)
|
||||
delete I.second;
|
||||
DeleteContainerSeconds(Converters);
|
||||
|
||||
DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n");
|
||||
DEBUG(MF.print(dbgs()));
|
||||
|
@ -1789,9 +1789,16 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
|
||||
bool X86FastISel::X86SelectShift(const Instruction *I) {
|
||||
unsigned CReg = 0, OpReg = 0;
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
assert(!I->getType()->isIntegerTy(8) &&
|
||||
"i8 shifts should be handled by autogenerated table");
|
||||
if (I->getType()->isIntegerTy(16)) {
|
||||
if (I->getType()->isIntegerTy(8)) {
|
||||
CReg = X86::CL;
|
||||
RC = &X86::GR8RegClass;
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::LShr: OpReg = X86::SHR8rCL; break;
|
||||
case Instruction::AShr: OpReg = X86::SAR8rCL; break;
|
||||
case Instruction::Shl: OpReg = X86::SHL8rCL; break;
|
||||
default: return false;
|
||||
}
|
||||
} else if (I->getType()->isIntegerTy(16)) {
|
||||
CReg = X86::CX;
|
||||
RC = &X86::GR16RegClass;
|
||||
switch (I->getOpcode()) {
|
||||
@ -1836,10 +1843,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
|
||||
|
||||
// The shift instruction uses X86::CL. If we defined a super-register
|
||||
// of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
|
||||
assert(CReg != X86::CL && "CReg should be a super register of CL");
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::KILL), X86::CL)
|
||||
.addReg(CReg, RegState::Kill);
|
||||
if (CReg != X86::CL)
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::KILL), X86::CL)
|
||||
.addReg(CReg, RegState::Kill);
|
||||
|
||||
unsigned ResultReg = createResultReg(RC);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
|
||||
|
@ -127,6 +127,10 @@ private:
|
||||
MachineInstr &JmpI, CondRegArray &CondRegs);
|
||||
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
|
||||
MachineInstr &CopyDefI);
|
||||
void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, MachineInstr &SetBI,
|
||||
MachineOperand &FlagUse, CondRegArray &CondRegs);
|
||||
void rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
MachineInstr &SetCCI, MachineOperand &FlagUse,
|
||||
@ -511,8 +515,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
} else if (MI.getOpcode() == TargetOpcode::COPY) {
|
||||
rewriteCopy(MI, *FlagUse, CopyDefI);
|
||||
} else {
|
||||
// We assume that arithmetic instructions that use flags also def
|
||||
// them.
|
||||
// We assume all other instructions that use flags also def them.
|
||||
assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
|
||||
"Expected a def of EFLAGS for this instruction!");
|
||||
|
||||
@ -524,7 +527,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
// logic.
|
||||
FlagsKilled = true;
|
||||
|
||||
rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
|
||||
switch (MI.getOpcode()) {
|
||||
case X86::SETB_C8r:
|
||||
case X86::SETB_C16r:
|
||||
case X86::SETB_C32r:
|
||||
case X86::SETB_C64r:
|
||||
// Use custom lowering for arithmetic that is merely extending the
|
||||
// carry flag. We model this as the SETB_C* pseudo instructions.
|
||||
rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
|
||||
CondRegs);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Generically handle remaining uses as arithmetic instructions.
|
||||
rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
|
||||
CondRegs);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -636,7 +655,7 @@ void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
|
||||
// also allow us to select a shorter encoding of `testb %reg, %reg` when that
|
||||
// would be equivalent.
|
||||
auto TestI =
|
||||
BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1);
|
||||
BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
|
||||
(void)TestI;
|
||||
DEBUG(dbgs() << " test cond: "; TestI->dump());
|
||||
++NumTestsInserted;
|
||||
@ -756,6 +775,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
|
||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs) {
|
||||
// This routine is only used to handle pseudos for setting a register to zero
|
||||
// or all ones based on CF. This is essentially the sign extended from 1-bit
|
||||
// form of SETB and modeled with the SETB_C* pseudos. They require special
|
||||
// handling as they aren't normal SETcc instructions and are lowered to an
|
||||
// EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
|
||||
// they are only provided in reg-defining forms. A complicating factor is that
|
||||
// they can define many different register widths.
|
||||
assert(SetBI.getOperand(0).isReg() &&
|
||||
"Cannot have a non-register defined operand to this variant of SETB!");
|
||||
|
||||
// Little helper to do the common final step of replacing the register def'ed
|
||||
// by this SETB instruction with a new register and removing the SETB
|
||||
// instruction.
|
||||
auto RewriteToReg = [&](unsigned Reg) {
|
||||
MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
|
||||
SetBI.eraseFromParent();
|
||||
};
|
||||
|
||||
// Grab the register class used for this particular instruction.
|
||||
auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
|
||||
|
||||
MachineBasicBlock &MBB = *SetBI.getParent();
|
||||
auto SetPos = SetBI.getIterator();
|
||||
auto SetLoc = SetBI.getDebugLoc();
|
||||
|
||||
auto AdjustReg = [&](unsigned Reg) {
|
||||
auto &OrigRC = *MRI->getRegClass(Reg);
|
||||
if (&OrigRC == &SetBRC)
|
||||
return Reg;
|
||||
|
||||
unsigned NewReg;
|
||||
|
||||
int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
|
||||
int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
|
||||
assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
|
||||
assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
|
||||
int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
|
||||
X86::NoSubRegister, X86::sub_32bit};
|
||||
|
||||
// If the original size is smaller than the target *and* is smaller than 4
|
||||
// bytes, we need to explicitly zero extend it. We always extend to 4-bytes
|
||||
// to maximize the chance of being able to CSE that operation and to avoid
|
||||
// partial dependency stalls extending to 2-bytes.
|
||||
if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
|
||||
NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
|
||||
.addReg(Reg);
|
||||
if (&SetBRC == &X86::GR32RegClass)
|
||||
return NewReg;
|
||||
Reg = NewReg;
|
||||
OrigRegSize = 4;
|
||||
}
|
||||
|
||||
NewReg = MRI->createVirtualRegister(&SetBRC);
|
||||
if (OrigRegSize < TargetRegSize) {
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
|
||||
NewReg)
|
||||
.addImm(0)
|
||||
.addReg(Reg)
|
||||
.addImm(SubRegIdx[OrigRegSize]);
|
||||
} else if (OrigRegSize > TargetRegSize) {
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
|
||||
NewReg)
|
||||
.addReg(Reg)
|
||||
.addImm(SubRegIdx[TargetRegSize]);
|
||||
} else {
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
|
||||
.addReg(Reg);
|
||||
}
|
||||
return NewReg;
|
||||
};
|
||||
|
||||
unsigned &CondReg = CondRegs[X86::COND_B];
|
||||
if (!CondReg)
|
||||
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
|
||||
|
||||
// Adjust the condition to have the desired register width by zero-extending
|
||||
// as needed.
|
||||
// FIXME: We should use a better API to avoid the local reference and using a
|
||||
// different variable here.
|
||||
unsigned ExtCondReg = AdjustReg(CondReg);
|
||||
|
||||
// Now we need to turn this into a bitmask. We do this by subtracting it from
|
||||
// zero.
|
||||
unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
|
||||
ZeroReg = AdjustReg(ZeroReg);
|
||||
|
||||
unsigned Sub;
|
||||
switch (SetBI.getOpcode()) {
|
||||
case X86::SETB_C8r:
|
||||
Sub = X86::SUB8rr;
|
||||
break;
|
||||
|
||||
case X86::SETB_C16r:
|
||||
Sub = X86::SUB16rr;
|
||||
break;
|
||||
|
||||
case X86::SETB_C32r:
|
||||
Sub = X86::SUB32rr;
|
||||
break;
|
||||
|
||||
case X86::SETB_C64r:
|
||||
Sub = X86::SUB64rr;
|
||||
break;
|
||||
|
||||
default:
|
||||
llvm_unreachable("Invalid SETB_C* opcode!");
|
||||
}
|
||||
unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
|
||||
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
|
||||
.addReg(ZeroReg)
|
||||
.addReg(ExtCondReg);
|
||||
return RewriteToReg(ResultReg);
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc,
|
||||
|
@ -1334,7 +1334,7 @@ let Predicates = [HasBMI2] in {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ADCX Instruction
|
||||
// ADCX and ADOX Instructions
|
||||
//
|
||||
let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
|
||||
Constraints = "$src0 = $dst", AddedComplexity = 10 in {
|
||||
@ -1349,6 +1349,15 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
|
||||
[(set GR64:$dst, EFLAGS,
|
||||
(X86adc_flag GR64:$src0, GR64:$src, EFLAGS))],
|
||||
IIC_BIN_CARRY_NONMEM>, T8PD;
|
||||
|
||||
// We don't have patterns for ADOX yet.
|
||||
let hasSideEffects = 0 in {
|
||||
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src0, GR32:$src),
|
||||
"adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
|
||||
|
||||
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src0, GR64:$src),
|
||||
"adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
|
||||
} // hasSideEffects = 0
|
||||
} // SchedRW
|
||||
|
||||
let mayLoad = 1, SchedRW = [WriteALULd] in {
|
||||
@ -1363,27 +1372,14 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
|
||||
[(set GR64:$dst, EFLAGS,
|
||||
(X86adc_flag GR64:$src0, (loadi64 addr:$src), EFLAGS))],
|
||||
IIC_BIN_CARRY_MEM>, T8PD;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ADOX Instruction
|
||||
//
|
||||
let Predicates = [HasADX], hasSideEffects = 0, Defs = [EFLAGS],
|
||||
Uses = [EFLAGS] in {
|
||||
let SchedRW = [WriteALU] in {
|
||||
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||
"adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
|
||||
|
||||
def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
||||
"adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS;
|
||||
} // SchedRW
|
||||
|
||||
let mayLoad = 1, SchedRW = [WriteALULd] in {
|
||||
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
||||
// We don't have patterns for ADOX yet.
|
||||
let hasSideEffects = 0 in {
|
||||
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src0, i32mem:$src),
|
||||
"adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
|
||||
|
||||
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
||||
def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src0, i64mem:$src),
|
||||
"adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS;
|
||||
}
|
||||
} // hasSideEffects = 0
|
||||
}
|
||||
|
@ -847,10 +847,20 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
|
||||
if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
|
||||
return nullptr;
|
||||
|
||||
// Can't change signature of musttail callee
|
||||
if (CS.isMustTailCall())
|
||||
return nullptr;
|
||||
|
||||
if (CS.getInstruction()->getParent()->getParent() == F)
|
||||
isSelfRecursive = true;
|
||||
}
|
||||
|
||||
// Can't change signature of musttail caller
|
||||
// FIXME: Support promoting whole chain of musttail functions
|
||||
for (BasicBlock &BB : *F)
|
||||
if (BB.getTerminatingMustTailCall())
|
||||
return nullptr;
|
||||
|
||||
const DataLayout &DL = F->getParent()->getDataLayout();
|
||||
|
||||
AAResults &AAR = AARGetter(*F);
|
||||
|
@ -507,14 +507,28 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
// MaybeLive. Initialized to a list of RetCount empty lists.
|
||||
RetUses MaybeLiveRetUses(RetCount);
|
||||
|
||||
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
|
||||
if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
|
||||
bool HasMustTailCalls = false;
|
||||
|
||||
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
|
||||
if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
|
||||
if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
|
||||
!= F.getFunctionType()->getReturnType()) {
|
||||
// We don't support old style multiple return values.
|
||||
MarkLive(F);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If we have any returns of `musttail` results - the signature can't
|
||||
// change
|
||||
if (BB->getTerminatingMustTailCall() != nullptr)
|
||||
HasMustTailCalls = true;
|
||||
}
|
||||
|
||||
if (HasMustTailCalls) {
|
||||
DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName()
|
||||
<< " has musttail calls\n");
|
||||
}
|
||||
|
||||
if (!F.hasLocalLinkage() && (!ShouldHackArguments || F.isIntrinsic())) {
|
||||
MarkLive(F);
|
||||
@ -526,6 +540,9 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
// Keep track of the number of live retvals, so we can skip checks once all
|
||||
// of them turn out to be live.
|
||||
unsigned NumLiveRetVals = 0;
|
||||
|
||||
bool HasMustTailCallers = false;
|
||||
|
||||
// Loop all uses of the function.
|
||||
for (const Use &U : F.uses()) {
|
||||
// If the function is PASSED IN as an argument, its address has been
|
||||
@ -536,6 +553,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The number of arguments for `musttail` call must match the number of
|
||||
// arguments of the caller
|
||||
if (CS.isMustTailCall())
|
||||
HasMustTailCallers = true;
|
||||
|
||||
// If this use is anything other than a call site, the function is alive.
|
||||
const Instruction *TheCall = CS.getInstruction();
|
||||
if (!TheCall) { // Not a direct call site?
|
||||
@ -580,6 +602,11 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
}
|
||||
}
|
||||
|
||||
if (HasMustTailCallers) {
|
||||
DEBUG(dbgs() << "DeadArgumentEliminationPass - " << F.getName()
|
||||
<< " has musttail callers\n");
|
||||
}
|
||||
|
||||
// Now we've inspected all callers, record the liveness of our return values.
|
||||
for (unsigned i = 0; i != RetCount; ++i)
|
||||
MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
|
||||
@ -593,12 +620,19 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
for (Function::const_arg_iterator AI = F.arg_begin(),
|
||||
E = F.arg_end(); AI != E; ++AI, ++i) {
|
||||
Liveness Result;
|
||||
if (F.getFunctionType()->isVarArg()) {
|
||||
if (F.getFunctionType()->isVarArg() || HasMustTailCallers ||
|
||||
HasMustTailCalls) {
|
||||
// Variadic functions will already have a va_arg function expanded inside
|
||||
// them, making them potentially very sensitive to ABI changes resulting
|
||||
// from removing arguments entirely, so don't. For example AArch64 handles
|
||||
// register and stack HFAs very differently, and this is reflected in the
|
||||
// IR which has already been generated.
|
||||
//
|
||||
// `musttail` calls to this function restrict argument removal attempts.
|
||||
// The signature of the caller must match the signature of the function.
|
||||
//
|
||||
// `musttail` calls in this function prevents us from changing its
|
||||
// signature
|
||||
Result = Live;
|
||||
} else {
|
||||
// See what the effect of this use is (recording any uses that cause
|
||||
|
@ -2099,8 +2099,31 @@ static void RemoveNestAttribute(Function *F) {
|
||||
/// GHC, or anyregcc.
|
||||
static bool isProfitableToMakeFastCC(Function *F) {
|
||||
CallingConv::ID CC = F->getCallingConv();
|
||||
|
||||
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
|
||||
return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
|
||||
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
|
||||
return false;
|
||||
|
||||
// FIXME: Change CC for the whole chain of musttail calls when possible.
|
||||
//
|
||||
// Can't change CC of the function that either has musttail calls, or is a
|
||||
// musttail callee itself
|
||||
for (User *U : F->users()) {
|
||||
if (isa<BlockAddress>(U))
|
||||
continue;
|
||||
CallInst* CI = dyn_cast<CallInst>(U);
|
||||
if (!CI)
|
||||
continue;
|
||||
|
||||
if (CI->isMustTailCall())
|
||||
return false;
|
||||
}
|
||||
|
||||
for (BasicBlock &BB : *F)
|
||||
if (BB.getTerminatingMustTailCall())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -638,6 +638,19 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
|
||||
DEBUG(dbgs() << " }\n");
|
||||
}
|
||||
|
||||
// Don't merge tiny functions using a thunk, since it can just end up
|
||||
// making the function larger.
|
||||
static bool isThunkProfitable(Function * F) {
|
||||
if (F->size() == 1) {
|
||||
if (F->front().size() <= 2) {
|
||||
DEBUG(dbgs() << "isThunkProfitable: " << F->getName()
|
||||
<< " is too small to bother creating a thunk for\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Replace G with a simple tail call to bitcast(F). Also (unless
|
||||
// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
|
||||
// delete G. Under MergeFunctionsPDI, we use G itself for creating
|
||||
@ -647,39 +660,6 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
|
||||
// For better debugability, under MergeFunctionsPDI, we do not modify G's
|
||||
// call sites to point to F even when within the same translation unit.
|
||||
void MergeFunctions::writeThunk(Function *F, Function *G) {
|
||||
if (!G->isInterposable() && !MergeFunctionsPDI) {
|
||||
if (G->hasGlobalUnnamedAddr()) {
|
||||
// G might have been a key in our GlobalNumberState, and it's illegal
|
||||
// to replace a key in ValueMap<GlobalValue *> with a non-global.
|
||||
GlobalNumbers.erase(G);
|
||||
// If G's address is not significant, replace it entirely.
|
||||
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
|
||||
G->replaceAllUsesWith(BitcastF);
|
||||
} else {
|
||||
// Redirect direct callers of G to F. (See note on MergeFunctionsPDI
|
||||
// above).
|
||||
replaceDirectCallers(G, F);
|
||||
}
|
||||
}
|
||||
|
||||
// If G was internal then we may have replaced all uses of G with F. If so,
|
||||
// stop here and delete G. There's no need for a thunk. (See note on
|
||||
// MergeFunctionsPDI above).
|
||||
if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
|
||||
G->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't merge tiny functions using a thunk, since it can just end up
|
||||
// making the function larger.
|
||||
if (F->size() == 1) {
|
||||
if (F->front().size() <= 2) {
|
||||
DEBUG(dbgs() << "writeThunk: " << F->getName()
|
||||
<< " is too small to bother creating a thunk for\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BasicBlock *GEntryBlock = nullptr;
|
||||
std::vector<Instruction *> PDIUnrelatedWL;
|
||||
BasicBlock *BB = nullptr;
|
||||
@ -754,6 +734,10 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
|
||||
if (F->isInterposable()) {
|
||||
assert(G->isInterposable());
|
||||
|
||||
if (!isThunkProfitable(F)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Make them both thunks to the same internal function.
|
||||
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
|
||||
F->getParent());
|
||||
@ -770,11 +754,41 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
|
||||
F->setAlignment(MaxAlignment);
|
||||
F->setLinkage(GlobalValue::PrivateLinkage);
|
||||
++NumDoubleWeak;
|
||||
++NumFunctionsMerged;
|
||||
} else {
|
||||
writeThunk(F, G);
|
||||
}
|
||||
// For better debugability, under MergeFunctionsPDI, we do not modify G's
|
||||
// call sites to point to F even when within the same translation unit.
|
||||
if (!G->isInterposable() && !MergeFunctionsPDI) {
|
||||
if (G->hasGlobalUnnamedAddr()) {
|
||||
// G might have been a key in our GlobalNumberState, and it's illegal
|
||||
// to replace a key in ValueMap<GlobalValue *> with a non-global.
|
||||
GlobalNumbers.erase(G);
|
||||
// If G's address is not significant, replace it entirely.
|
||||
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
|
||||
G->replaceAllUsesWith(BitcastF);
|
||||
} else {
|
||||
// Redirect direct callers of G to F. (See note on MergeFunctionsPDI
|
||||
// above).
|
||||
replaceDirectCallers(G, F);
|
||||
}
|
||||
}
|
||||
|
||||
++NumFunctionsMerged;
|
||||
// If G was internal then we may have replaced all uses of G with F. If so,
|
||||
// stop here and delete G. There's no need for a thunk. (See note on
|
||||
// MergeFunctionsPDI above).
|
||||
if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
|
||||
G->eraseFromParent();
|
||||
++NumFunctionsMerged;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isThunkProfitable(F)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeThunk(F, G);
|
||||
++NumFunctionsMerged;
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace function F by function G.
|
||||
|
@ -34,6 +34,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "InstCombineInternal.h"
|
||||
#include "llvm-c/Initialization.h"
|
||||
#include "llvm/ADT/APInt.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
@ -1946,13 +1947,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||
// addrspacecast between types is canonicalized as a bitcast, then an
|
||||
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
|
||||
// through the addrspacecast.
|
||||
Value *ASCStrippedPtrOp = PtrOp;
|
||||
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
|
||||
// X = bitcast A addrspace(1)* to B addrspace(1)*
|
||||
// Y = addrspacecast A addrspace(1)* to B addrspace(2)*
|
||||
// Z = gep Y, <...constant indices...>
|
||||
// Into an addrspacecasted GEP of the struct.
|
||||
if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
|
||||
PtrOp = BC;
|
||||
ASCStrippedPtrOp = BC;
|
||||
}
|
||||
|
||||
/// See if we can simplify:
|
||||
@ -1960,7 +1962,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||
/// Y = gep X, <...constant indices...>
|
||||
/// into a gep of the original struct. This is important for SROA and alias
|
||||
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
|
||||
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
|
||||
if (BitCastInst *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
|
||||
Value *Operand = BCI->getOperand(0);
|
||||
PointerType *OpType = cast<PointerType>(Operand->getType());
|
||||
unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
|
||||
|
@ -201,6 +201,46 @@ static bool canSplitCallSite(CallSite CS) {
|
||||
return CallSiteBB->canSplitPredecessors();
|
||||
}
|
||||
|
||||
static Instruction *cloneInstForMustTail(Instruction *I, Instruction *Before,
|
||||
Value *V) {
|
||||
Instruction *Copy = I->clone();
|
||||
Copy->setName(I->getName());
|
||||
Copy->insertBefore(Before);
|
||||
if (V)
|
||||
Copy->setOperand(0, V);
|
||||
return Copy;
|
||||
}
|
||||
|
||||
/// Copy mandatory `musttail` return sequence that follows original `CI`, and
|
||||
/// link it up to `NewCI` value instead:
|
||||
///
|
||||
/// * (optional) `bitcast NewCI to ...`
|
||||
/// * `ret bitcast or NewCI`
|
||||
///
|
||||
/// Insert this sequence right before `SplitBB`'s terminator, which will be
|
||||
/// cleaned up later in `splitCallSite` below.
|
||||
static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
|
||||
Instruction *NewCI) {
|
||||
bool IsVoid = SplitBB->getParent()->getReturnType()->isVoidTy();
|
||||
auto II = std::next(CI->getIterator());
|
||||
|
||||
BitCastInst *BCI = dyn_cast<BitCastInst>(&*II);
|
||||
if (BCI)
|
||||
++II;
|
||||
|
||||
ReturnInst *RI = dyn_cast<ReturnInst>(&*II);
|
||||
assert(RI && "`musttail` call must be followed by `ret` instruction");
|
||||
|
||||
TerminatorInst *TI = SplitBB->getTerminator();
|
||||
Value *V = NewCI;
|
||||
if (BCI)
|
||||
V = cloneInstForMustTail(BCI, TI, V);
|
||||
cloneInstForMustTail(RI, TI, IsVoid ? nullptr : V);
|
||||
|
||||
// FIXME: remove TI here, `DuplicateInstructionsInSplitBetween` has a bug
|
||||
// that prevents doing this now.
|
||||
}
|
||||
|
||||
/// Return true if the CS is split into its new predecessors which are directly
|
||||
/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2.
|
||||
/// CallInst1 and CallInst2 will be the new call-sites placed in the new
|
||||
@ -245,6 +285,7 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
|
||||
Instruction *CallInst1, Instruction *CallInst2) {
|
||||
Instruction *Instr = CS.getInstruction();
|
||||
BasicBlock *TailBB = Instr->getParent();
|
||||
bool IsMustTailCall = CS.isMustTailCall();
|
||||
assert(Instr == (TailBB->getFirstNonPHIOrDbg()) && "Unexpected call-site");
|
||||
|
||||
BasicBlock *SplitBlock1 =
|
||||
@ -276,9 +317,14 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
|
||||
++ArgNo;
|
||||
}
|
||||
}
|
||||
// Clone and place bitcast and return instructions before `TI`
|
||||
if (IsMustTailCall) {
|
||||
copyMustTailReturn(SplitBlock1, CS.getInstruction(), CallInst1);
|
||||
copyMustTailReturn(SplitBlock2, CS.getInstruction(), CallInst2);
|
||||
}
|
||||
|
||||
// Replace users of the original call with a PHI mering call-sites split.
|
||||
if (Instr->getNumUses()) {
|
||||
if (!IsMustTailCall && Instr->getNumUses()) {
|
||||
PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call",
|
||||
TailBB->getFirstNonPHI());
|
||||
PN->addIncoming(CallInst1, SplitBlock1);
|
||||
@ -290,8 +336,25 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
|
||||
<< "\n");
|
||||
DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName()
|
||||
<< "\n");
|
||||
Instr->eraseFromParent();
|
||||
|
||||
NumCallSiteSplit++;
|
||||
|
||||
// FIXME: remove TI in `copyMustTailReturn`
|
||||
if (IsMustTailCall) {
|
||||
// Remove superfluous `br` terminators from the end of the Split blocks
|
||||
// NOTE: Removing terminator removes the SplitBlock from the TailBB's
|
||||
// predecessors. Therefore we must get complete list of Splits before
|
||||
// attempting removal.
|
||||
SmallVector<BasicBlock *, 2> Splits(predecessors((TailBB)));
|
||||
assert(Splits.size() == 2 && "Expected exactly 2 splits!");
|
||||
for (unsigned i = 0; i < Splits.size(); i++)
|
||||
Splits[i]->getTerminator()->eraseFromParent();
|
||||
|
||||
// Erase the tail block once done with musttail patching
|
||||
TailBB->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
Instr->eraseFromParent();
|
||||
}
|
||||
|
||||
// Return true if the call-site has an argument which is a PHI with only
|
||||
@ -369,7 +432,17 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
|
||||
Function *Callee = CS.getCalledFunction();
|
||||
if (!Callee || Callee->isDeclaration())
|
||||
continue;
|
||||
|
||||
// Successful musttail call-site splits result in erased CI and erased BB.
|
||||
// Check if such path is possible before attempting the splitting.
|
||||
bool IsMustTail = CS.isMustTailCall();
|
||||
|
||||
Changed |= tryToSplitCallSite(CS);
|
||||
|
||||
// There're no interesting instructions after this. The call site
|
||||
// itself might have been erased on splitting.
|
||||
if (IsMustTail)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
|
@ -13,6 +13,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar/DivRemPairs.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
@ -48,7 +50,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
|
||||
// Insert all divide and remainder instructions into maps keyed by their
|
||||
// operands and opcode (signed or unsigned).
|
||||
DenseMap<DivRemMapKey, Instruction *> DivMap, RemMap;
|
||||
DenseMap<DivRemMapKey, Instruction *> DivMap;
|
||||
// Use a MapVector for RemMap so that instructions are moved/inserted in a
|
||||
// deterministic order.
|
||||
MapVector<DivRemMapKey, Instruction *> RemMap;
|
||||
for (auto &BB : F) {
|
||||
for (auto &I : BB) {
|
||||
if (I.getOpcode() == Instruction::SDiv)
|
||||
@ -67,14 +72,14 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
// rare than division.
|
||||
for (auto &RemPair : RemMap) {
|
||||
// Find the matching division instruction from the division map.
|
||||
Instruction *DivInst = DivMap[RemPair.getFirst()];
|
||||
Instruction *DivInst = DivMap[RemPair.first];
|
||||
if (!DivInst)
|
||||
continue;
|
||||
|
||||
// We have a matching pair of div/rem instructions. If one dominates the
|
||||
// other, hoist and/or replace one.
|
||||
NumPairs++;
|
||||
Instruction *RemInst = RemPair.getSecond();
|
||||
Instruction *RemInst = RemPair.second;
|
||||
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
|
||||
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
|
||||
|
||||
|
@ -1454,6 +1454,9 @@ FindMostPopularDest(BasicBlock *BB,
|
||||
if (PredToDest.second)
|
||||
DestPopularity[PredToDest.second]++;
|
||||
|
||||
if (DestPopularity.empty())
|
||||
return nullptr;
|
||||
|
||||
// Find the most popular dest.
|
||||
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
|
||||
BasicBlock *MostPopularDest = DPI->first;
|
||||
@ -1629,8 +1632,20 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
|
||||
// threadable destination (the common case) we can avoid this.
|
||||
BasicBlock *MostPopularDest = OnlyDest;
|
||||
|
||||
if (MostPopularDest == MultipleDestSentinel)
|
||||
if (MostPopularDest == MultipleDestSentinel) {
|
||||
// Remove any loop headers from the Dest list, ThreadEdge conservatively
|
||||
// won't process them, but we might have other destination that are eligible
|
||||
// and we still want to process.
|
||||
erase_if(PredToDestList,
|
||||
[&](const std::pair<BasicBlock *, BasicBlock *> &PredToDest) {
|
||||
return LoopHeaders.count(PredToDest.second) != 0;
|
||||
});
|
||||
|
||||
if (PredToDestList.empty())
|
||||
return false;
|
||||
|
||||
MostPopularDest = FindMostPopularDest(BB, PredToDestList);
|
||||
}
|
||||
|
||||
// Now that we know what the most popular destination is, factor all
|
||||
// predecessors that will jump to it into a single predecessor.
|
||||
|
@ -223,6 +223,10 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
|
||||
/// represented here for efficient lookup.
|
||||
SmallPtrSet<Function *, 16> MRVFunctionsTracked;
|
||||
|
||||
/// MustTailFunctions - Each function here is a callee of non-removable
|
||||
/// musttail call site.
|
||||
SmallPtrSet<Function *, 16> MustTailCallees;
|
||||
|
||||
/// TrackingIncomingArguments - This is the set of functions for whose
|
||||
/// arguments we make optimistic assumptions about and try to prove as
|
||||
/// constants.
|
||||
@ -289,6 +293,18 @@ public:
|
||||
TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
|
||||
}
|
||||
|
||||
/// AddMustTailCallee - If the SCCP solver finds that this function is called
|
||||
/// from non-removable musttail call site.
|
||||
void AddMustTailCallee(Function *F) {
|
||||
MustTailCallees.insert(F);
|
||||
}
|
||||
|
||||
/// Returns true if the given function is called from non-removable musttail
|
||||
/// call site.
|
||||
bool isMustTailCallee(Function *F) {
|
||||
return MustTailCallees.count(F);
|
||||
}
|
||||
|
||||
void AddArgumentTrackedFunction(Function *F) {
|
||||
TrackingIncomingArguments.insert(F);
|
||||
}
|
||||
@ -358,6 +374,12 @@ public:
|
||||
return MRVFunctionsTracked;
|
||||
}
|
||||
|
||||
/// getMustTailCallees - Get the set of functions which are called
|
||||
/// from non-removable musttail call sites.
|
||||
const SmallPtrSet<Function *, 16> getMustTailCallees() {
|
||||
return MustTailCallees;
|
||||
}
|
||||
|
||||
/// markOverdefined - Mark the specified value overdefined. This
|
||||
/// works with both scalars and structs.
|
||||
void markOverdefined(Value *V) {
|
||||
@ -1672,6 +1694,23 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
|
||||
IV.isConstant() ? IV.getConstant() : UndefValue::get(V->getType());
|
||||
}
|
||||
assert(Const && "Constant is nullptr here!");
|
||||
|
||||
// Replacing `musttail` instructions with constant breaks `musttail` invariant
|
||||
// unless the call itself can be removed
|
||||
CallInst *CI = dyn_cast<CallInst>(V);
|
||||
if (CI && CI->isMustTailCall() && !isInstructionTriviallyDead(CI)) {
|
||||
CallSite CS(CI);
|
||||
Function *F = CS.getCalledFunction();
|
||||
|
||||
// Don't zap returns of the callee
|
||||
if (F)
|
||||
Solver.AddMustTailCallee(F);
|
||||
|
||||
DEBUG(dbgs() << " Can\'t treat the result of musttail call : " << *CI
|
||||
<< " as a constant\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << " Constant: " << *Const << " = " << *V << '\n');
|
||||
|
||||
// Replaces all of the uses of a variable with uses of the constant.
|
||||
@ -1802,10 +1841,26 @@ static void findReturnsToZap(Function &F,
|
||||
if (!Solver.isArgumentTrackedFunction(&F))
|
||||
return;
|
||||
|
||||
for (BasicBlock &BB : F)
|
||||
// There is a non-removable musttail call site of this function. Zapping
|
||||
// returns is not allowed.
|
||||
if (Solver.isMustTailCallee(&F)) {
|
||||
DEBUG(dbgs() << "Can't zap returns of the function : " << F.getName()
|
||||
<< " due to present musttail call of it\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (BasicBlock &BB : F) {
|
||||
if (CallInst *CI = BB.getTerminatingMustTailCall()) {
|
||||
DEBUG(dbgs() << "Can't zap return of the block due to present "
|
||||
<< "musttail call : " << *CI << "\n");
|
||||
(void)CI;
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
|
||||
if (!isa<UndefValue>(RI->getOperand(0)))
|
||||
ReturnsToZap.push_back(RI);
|
||||
}
|
||||
}
|
||||
|
||||
static bool runIPSCCP(Module &M, const DataLayout &DL,
|
||||
|
@ -710,7 +710,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
|
||||
return Res;
|
||||
if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
|
||||
return Res;
|
||||
llvm_unreachable("InlineAsm blocks were not uniqued.");
|
||||
assert(L->getFunctionType() != R->getFunctionType());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -326,6 +326,10 @@ def warn_drv_unsupported_abicalls : Warning<
|
||||
"ignoring '-mabicalls' option as it cannot be used with "
|
||||
"non position-independent code and the N64 ABI">,
|
||||
InGroup<OptionIgnored>;
|
||||
def err_drv_unsupported_indirect_jump_opt : Error<
|
||||
"'-mindirect-jump=%0' is unsupported with the '%1' architecture">;
|
||||
def err_drv_unknown_indirect_jump_opt : Error<
|
||||
"unknown '-mindirect-jump=' option '%0'">;
|
||||
|
||||
def warn_drv_unable_to_find_directory_expected : Warning<
|
||||
"unable to find %0 directory, expected to be in '%1'">,
|
||||
|
@ -759,6 +759,10 @@ def warn_cxx_ms_struct :
|
||||
Warning<"ms_struct may not produce Microsoft-compatible layouts for classes "
|
||||
"with base classes or virtual functions">,
|
||||
DefaultError, InGroup<IncompatibleMSStruct>;
|
||||
def warn_npot_ms_struct :
|
||||
Warning<"ms_struct may not produce Microsoft-compatible layouts with fundamental "
|
||||
"data types with sizes that aren't a power of two">,
|
||||
DefaultError, InGroup<IncompatibleMSStruct>;
|
||||
def err_section_conflict : Error<"%0 causes a section type conflict with %1">;
|
||||
def err_no_base_classes : Error<"invalid use of '__super', %0 has no base classes">;
|
||||
def err_invalid_super_scope : Error<"invalid use of '__super', "
|
||||
|
@ -238,7 +238,7 @@ def _SLASH_Fo : CLCompileJoined<"Fo">,
|
||||
def _SLASH_GX : CLFlag<"GX">,
|
||||
HelpText<"Enable exception handling">;
|
||||
def _SLASH_GX_ : CLFlag<"GX-">,
|
||||
HelpText<"Enable exception handling">;
|
||||
HelpText<"Disable exception handling">;
|
||||
def _SLASH_imsvc : CLJoinedOrSeparate<"imsvc">,
|
||||
HelpText<"Add directory to system include search path, as if part of %INCLUDE%">,
|
||||
MetaVarName<"<dir>">;
|
||||
|
@ -1100,7 +1100,8 @@ def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
|
||||
HelpText<"Perform ThinLTO importing using provided function summary index">;
|
||||
def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
|
||||
Group<f_Group>, Flags<[DriverOption, CoreOption]>;
|
||||
def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>;
|
||||
def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>,
|
||||
Flags<[CC1Option]>, HelpText<"Allow merging of constants">;
|
||||
def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group<f_Group>;
|
||||
def fms_extensions : Flag<["-"], "fms-extensions">, Group<f_Group>, Flags<[CC1Option, CoreOption]>,
|
||||
HelpText<"Accept some non-standard constructs supported by the Microsoft compiler">;
|
||||
@ -1249,7 +1250,7 @@ def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
|
||||
def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
|
||||
HelpText<"Disallow implicit conversions between vectors with a different number of elements or different element types">, Flags<[CC1Option]>;
|
||||
def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group<f_Group>,
|
||||
Flags<[CC1Option]>, HelpText<"Disallow merging of constants">;
|
||||
HelpText<"Disallow merging of constants">;
|
||||
def fno_modules : Flag <["-"], "fno-modules">, Group<f_Group>,
|
||||
Flags<[DriverOption]>;
|
||||
def fno_implicit_module_maps : Flag <["-"], "fno-implicit-module-maps">, Group<f_Group>,
|
||||
@ -1992,6 +1993,9 @@ def mbranch_likely : Flag<["-"], "mbranch-likely">, Group<m_Group>,
|
||||
IgnoredGCCCompat;
|
||||
def mno_branch_likely : Flag<["-"], "mno-branch-likely">, Group<m_Group>,
|
||||
IgnoredGCCCompat;
|
||||
def mindirect_jump_EQ : Joined<["-"], "mindirect-jump=">,
|
||||
Group<m_Group>,
|
||||
HelpText<"Change indirect jump instructions to inhibit speculation">;
|
||||
def mdsp : Flag<["-"], "mdsp">, Group<m_Group>;
|
||||
def mno_dsp : Flag<["-"], "mno-dsp">, Group<m_Group>;
|
||||
def mdspr2 : Flag<["-"], "mdspr2">, Group<m_Group>;
|
||||
|
@ -61,14 +61,22 @@ namespace {
|
||||
|
||||
static QualType getType(APValue::LValueBase B) {
|
||||
if (!B) return QualType();
|
||||
if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>())
|
||||
if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) {
|
||||
// FIXME: It's unclear where we're supposed to take the type from, and
|
||||
// this actually matters for arrays of unknown bound. Using the type of
|
||||
// the most recent declaration isn't clearly correct in general. Eg:
|
||||
// this actually matters for arrays of unknown bound. Eg:
|
||||
//
|
||||
// extern int arr[]; void f() { extern int arr[3]; };
|
||||
// constexpr int *p = &arr[1]; // valid?
|
||||
return cast<ValueDecl>(D->getMostRecentDecl())->getType();
|
||||
//
|
||||
// For now, we take the array bound from the most recent declaration.
|
||||
for (auto *Redecl = cast<ValueDecl>(D->getMostRecentDecl()); Redecl;
|
||||
Redecl = cast_or_null<ValueDecl>(Redecl->getPreviousDecl())) {
|
||||
QualType T = Redecl->getType();
|
||||
if (!T->isIncompleteArrayType())
|
||||
return T;
|
||||
}
|
||||
return D->getType();
|
||||
}
|
||||
|
||||
const Expr *Base = B.get<const Expr*>();
|
||||
|
||||
@ -8535,9 +8543,6 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
|
||||
(LHSValue.Base && isZeroSized(RHSValue)))
|
||||
return Error(E);
|
||||
// Pointers with different bases cannot represent the same object.
|
||||
// (Note that clang defaults to -fmerge-all-constants, which can
|
||||
// lead to inconsistent results for comparisons involving the address
|
||||
// of a constant; this generally doesn't matter in practice.)
|
||||
return Success(E->getOpcode() == BO_NE, E);
|
||||
}
|
||||
|
||||
|
@ -1751,7 +1751,34 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
|
||||
QualType T = Context.getBaseElementType(D->getType());
|
||||
if (const BuiltinType *BTy = T->getAs<BuiltinType>()) {
|
||||
CharUnits TypeSize = Context.getTypeSizeInChars(BTy);
|
||||
if (TypeSize > FieldAlign)
|
||||
|
||||
if (!llvm::isPowerOf2_64(TypeSize.getQuantity())) {
|
||||
assert(
|
||||
!Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment() &&
|
||||
"Non PowerOf2 size in MSVC mode");
|
||||
// Base types with sizes that aren't a power of two don't work
|
||||
// with the layout rules for MS structs. This isn't an issue in
|
||||
// MSVC itself since there are no such base data types there.
|
||||
// On e.g. x86_32 mingw and linux, long double is 12 bytes though.
|
||||
// Any structs involving that data type obviously can't be ABI
|
||||
// compatible with MSVC regardless of how it is laid out.
|
||||
|
||||
// Since ms_struct can be mass enabled (via a pragma or via the
|
||||
// -mms-bitfields command line parameter), this can trigger for
|
||||
// structs that don't actually need MSVC compatibility, so we
|
||||
// need to be able to sidestep the ms_struct layout for these types.
|
||||
|
||||
// Since the combination of -mms-bitfields together with structs
|
||||
// like max_align_t (which contains a long double) for mingw is
|
||||
// quite comon (and GCC handles it silently), just handle it
|
||||
// silently there. For other targets that have ms_struct enabled
|
||||
// (most probably via a pragma or attribute), trigger a diagnostic
|
||||
// that defaults to an error.
|
||||
if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment())
|
||||
Diag(D->getLocation(), diag::warn_npot_ms_struct);
|
||||
}
|
||||
if (TypeSize > FieldAlign &&
|
||||
llvm::isPowerOf2_64(TypeSize.getQuantity()))
|
||||
FieldAlign = TypeSize;
|
||||
}
|
||||
}
|
||||
|
@ -299,7 +299,40 @@ ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
|
||||
}
|
||||
|
||||
const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
|
||||
{{"w31"}, "wsp"}, {{"x29"}, "fp"}, {{"x30"}, "lr"}, {{"x31"}, "sp"},
|
||||
{{"w31"}, "wsp"},
|
||||
{{"x31"}, "sp"},
|
||||
// GCC rN registers are aliases of xN registers.
|
||||
{{"r0"}, "x0"},
|
||||
{{"r1"}, "x1"},
|
||||
{{"r2"}, "x2"},
|
||||
{{"r3"}, "x3"},
|
||||
{{"r4"}, "x4"},
|
||||
{{"r5"}, "x5"},
|
||||
{{"r6"}, "x6"},
|
||||
{{"r7"}, "x7"},
|
||||
{{"r8"}, "x8"},
|
||||
{{"r9"}, "x9"},
|
||||
{{"r10"}, "x10"},
|
||||
{{"r11"}, "x11"},
|
||||
{{"r12"}, "x12"},
|
||||
{{"r13"}, "x13"},
|
||||
{{"r14"}, "x14"},
|
||||
{{"r15"}, "x15"},
|
||||
{{"r16"}, "x16"},
|
||||
{{"r17"}, "x17"},
|
||||
{{"r18"}, "x18"},
|
||||
{{"r19"}, "x19"},
|
||||
{{"r20"}, "x20"},
|
||||
{{"r21"}, "x21"},
|
||||
{{"r22"}, "x22"},
|
||||
{{"r23"}, "x23"},
|
||||
{{"r24"}, "x24"},
|
||||
{{"r25"}, "x25"},
|
||||
{{"r26"}, "x26"},
|
||||
{{"r27"}, "x27"},
|
||||
{{"r28"}, "x28"},
|
||||
{{"r29", "x29"}, "fp"},
|
||||
{{"r30", "x30"}, "lr"},
|
||||
// The S/D/Q and W/X registers overlap, but aren't really aliases; we
|
||||
// don't want to substitute one of these for a different-sized one.
|
||||
};
|
||||
|
@ -54,6 +54,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo {
|
||||
enum DspRevEnum { NoDSP, DSP1, DSP2 } DspRev;
|
||||
bool HasMSA;
|
||||
bool DisableMadd4;
|
||||
bool UseIndirectJumpHazard;
|
||||
|
||||
protected:
|
||||
bool HasFP64;
|
||||
@ -64,7 +65,8 @@ public:
|
||||
: TargetInfo(Triple), IsMips16(false), IsMicromips(false),
|
||||
IsNan2008(false), IsAbs2008(false), IsSingleFloat(false),
|
||||
IsNoABICalls(false), CanUseBSDABICalls(false), FloatABI(HardFloat),
|
||||
DspRev(NoDSP), HasMSA(false), DisableMadd4(false), HasFP64(false) {
|
||||
DspRev(NoDSP), HasMSA(false), DisableMadd4(false),
|
||||
UseIndirectJumpHazard(false), HasFP64(false) {
|
||||
TheCXXABI.set(TargetCXXABI::GenericMIPS);
|
||||
|
||||
setABI((getTriple().getArch() == llvm::Triple::mips ||
|
||||
@ -338,6 +340,8 @@ public:
|
||||
IsAbs2008 = false;
|
||||
else if (Feature == "+noabicalls")
|
||||
IsNoABICalls = true;
|
||||
else if (Feature == "+use-indirect-jump-hazard")
|
||||
UseIndirectJumpHazard = true;
|
||||
}
|
||||
|
||||
setDataLayout();
|
||||
|
@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {
|
||||
|
||||
// If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
|
||||
// pick up a tag in an SVN export, for example.
|
||||
StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_600/final/lib/Basic/Version.cpp $");
|
||||
StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_601/final/lib/Basic/Version.cpp $");
|
||||
if (URL.empty()) {
|
||||
URL = SVNRepository.slice(SVNRepository.find(':'),
|
||||
SVNRepository.find("/lib/Basic"));
|
||||
|
@ -1931,13 +1931,8 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(
|
||||
return;
|
||||
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
|
||||
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
|
||||
// Get the LLVM function.
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
|
||||
// Now add the 'alignstack' attribute with a value of 16.
|
||||
llvm::AttrBuilder B;
|
||||
B.addStackAlignmentAttr(16);
|
||||
Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
|
||||
Fn->addFnAttr("stackrealign");
|
||||
}
|
||||
if (FD->hasAttr<AnyX86InterruptAttr>()) {
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
@ -2292,13 +2287,8 @@ public:
|
||||
return;
|
||||
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
|
||||
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
|
||||
// Get the LLVM function.
|
||||
auto *Fn = cast<llvm::Function>(GV);
|
||||
|
||||
// Now add the 'alignstack' attribute with a value of 16.
|
||||
llvm::AttrBuilder B;
|
||||
B.addStackAlignmentAttr(16);
|
||||
Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
Fn->addFnAttr("stackrealign");
|
||||
}
|
||||
if (FD->hasAttr<AnyX86InterruptAttr>()) {
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
@ -2429,13 +2419,8 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes(
|
||||
return;
|
||||
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
|
||||
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
|
||||
// Get the LLVM function.
|
||||
auto *Fn = cast<llvm::Function>(GV);
|
||||
|
||||
// Now add the 'alignstack' attribute with a value of 16.
|
||||
llvm::AttrBuilder B;
|
||||
B.addStackAlignmentAttr(16);
|
||||
Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
Fn->addFnAttr("stackrealign");
|
||||
}
|
||||
if (FD->hasAttr<AnyX86InterruptAttr>()) {
|
||||
llvm::Function *Fn = cast<llvm::Function>(GV);
|
||||
|
@ -858,11 +858,14 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
|
||||
: std::move(*CLOptions));
|
||||
if (HasConfigFile)
|
||||
for (auto *Opt : *CLOptions) {
|
||||
if (Opt->getOption().matches(options::OPT_config))
|
||||
continue;
|
||||
unsigned Index = Args.MakeIndex(Opt->getSpelling());
|
||||
const Arg *BaseArg = &Opt->getBaseArg();
|
||||
if (BaseArg == Opt)
|
||||
BaseArg = nullptr;
|
||||
Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Opt->getSpelling(),
|
||||
Args.size(), BaseArg);
|
||||
Index, BaseArg);
|
||||
Copy->getValues() = Opt->getValues();
|
||||
if (Opt->isClaimed())
|
||||
Copy->claim();
|
||||
|
@ -343,6 +343,28 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
AddTargetFeature(Args, Features, options::OPT_mno_madd4, options::OPT_mmadd4,
|
||||
"nomadd4");
|
||||
AddTargetFeature(Args, Features, options::OPT_mmt, options::OPT_mno_mt, "mt");
|
||||
|
||||
if (Arg *A = Args.getLastArg(options::OPT_mindirect_jump_EQ)) {
|
||||
StringRef Val = StringRef(A->getValue());
|
||||
if (Val == "hazard") {
|
||||
Arg *B =
|
||||
Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
|
||||
Arg *C = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
|
||||
|
||||
if (B && B->getOption().matches(options::OPT_mmicromips))
|
||||
D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
|
||||
<< "hazard" << "micromips";
|
||||
else if (C && C->getOption().matches(options::OPT_mips16))
|
||||
D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
|
||||
<< "hazard" << "mips16";
|
||||
else if (mips::supportsIndirectJumpHazardBarrier(CPUName))
|
||||
Features.push_back("+use-indirect-jump-hazard");
|
||||
else
|
||||
D.Diag(diag::err_drv_unsupported_indirect_jump_opt)
|
||||
<< "hazard" << CPUName;
|
||||
} else
|
||||
D.Diag(diag::err_drv_unknown_indirect_jump_opt) << Val;
|
||||
}
|
||||
}
|
||||
|
||||
mips::IEEE754Standard mips::getIEEE754Standard(StringRef &CPU) {
|
||||
@ -447,3 +469,20 @@ bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple,
|
||||
|
||||
return UseFPXX;
|
||||
}
|
||||
|
||||
bool mips::supportsIndirectJumpHazardBarrier(StringRef &CPU) {
|
||||
// Supporting the hazard barrier method of dealing with indirect
|
||||
// jumps requires MIPSR2 support.
|
||||
return llvm::StringSwitch<bool>(CPU)
|
||||
.Case("mips32r2", true)
|
||||
.Case("mips32r3", true)
|
||||
.Case("mips32r5", true)
|
||||
.Case("mips32r6", true)
|
||||
.Case("mips64r2", true)
|
||||
.Case("mips64r3", true)
|
||||
.Case("mips64r5", true)
|
||||
.Case("mips64r6", true)
|
||||
.Case("octeon", true)
|
||||
.Case("p5600", true)
|
||||
.Default(false);
|
||||
}
|
||||
|
@ -53,6 +53,7 @@ bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
|
||||
bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
|
||||
StringRef CPUName, StringRef ABIName,
|
||||
mips::FloatABI FloatABI);
|
||||
bool supportsIndirectJumpHazardBarrier(StringRef &CPU);
|
||||
|
||||
} // end namespace mips
|
||||
} // end namespace target
|
||||
|
@ -3288,9 +3288,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
|
||||
Args.AddLastArg(CmdArgs, options::OPT_fveclib);
|
||||
|
||||
if (!Args.hasFlag(options::OPT_fmerge_all_constants,
|
||||
options::OPT_fno_merge_all_constants))
|
||||
CmdArgs.push_back("-fno-merge-all-constants");
|
||||
if (Args.hasFlag(options::OPT_fmerge_all_constants,
|
||||
options::OPT_fno_merge_all_constants, false))
|
||||
CmdArgs.push_back("-fmerge-all-constants");
|
||||
|
||||
// LLVM Code Generator Options.
|
||||
|
||||
|
@ -127,7 +127,8 @@ void tools::CrossWindows::Linker::ConstructJob(
|
||||
}
|
||||
|
||||
CmdArgs.push_back("-shared");
|
||||
CmdArgs.push_back("-Bdynamic");
|
||||
CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic"
|
||||
: "-Bdynamic");
|
||||
|
||||
CmdArgs.push_back("--enable-auto-image-base");
|
||||
|
||||
|
@ -141,22 +141,21 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("console");
|
||||
}
|
||||
|
||||
if (Args.hasArg(options::OPT_mdll))
|
||||
CmdArgs.push_back("--dll");
|
||||
else if (Args.hasArg(options::OPT_shared))
|
||||
CmdArgs.push_back("--shared");
|
||||
if (Args.hasArg(options::OPT_static))
|
||||
CmdArgs.push_back("-Bstatic");
|
||||
else {
|
||||
if (Args.hasArg(options::OPT_mdll))
|
||||
CmdArgs.push_back("--dll");
|
||||
else if (Args.hasArg(options::OPT_shared))
|
||||
CmdArgs.push_back("--shared");
|
||||
else
|
||||
CmdArgs.push_back("-Bdynamic");
|
||||
if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
|
||||
CmdArgs.push_back("-e");
|
||||
if (TC.getArch() == llvm::Triple::x86)
|
||||
CmdArgs.push_back("_DllMainCRTStartup@12");
|
||||
else
|
||||
CmdArgs.push_back("DllMainCRTStartup");
|
||||
CmdArgs.push_back("--enable-auto-image-base");
|
||||
}
|
||||
if (Args.hasArg(options::OPT_mdll) || Args.hasArg(options::OPT_shared)) {
|
||||
CmdArgs.push_back("-e");
|
||||
if (TC.getArch() == llvm::Triple::x86)
|
||||
CmdArgs.push_back("_DllMainCRTStartup@12");
|
||||
else
|
||||
CmdArgs.push_back("DllMainCRTStartup");
|
||||
CmdArgs.push_back("--enable-auto-image-base");
|
||||
}
|
||||
|
||||
CmdArgs.push_back("-o");
|
||||
|
@ -1259,6 +1259,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
|
||||
Preamble.reset();
|
||||
PreambleDiagnostics.clear();
|
||||
TopLevelDeclsInPreamble.clear();
|
||||
PreambleSrcLocCache.clear();
|
||||
PreambleRebuildCounter = 1;
|
||||
}
|
||||
}
|
||||
|
@ -552,7 +552,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
||||
Args.hasFlag(OPT_ffine_grained_bitfield_accesses,
|
||||
OPT_fno_fine_grained_bitfield_accesses, false);
|
||||
Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags);
|
||||
Opts.MergeAllConstants = !Args.hasArg(OPT_fno_merge_all_constants);
|
||||
Opts.MergeAllConstants = Args.hasArg(OPT_fmerge_all_constants);
|
||||
Opts.NoCommon = Args.hasArg(OPT_fno_common);
|
||||
Opts.NoImplicitFloat = Args.hasArg(OPT_no_implicit_float);
|
||||
Opts.OptimizeSize = getOptimizationLevelSize(Args);
|
||||
|
@ -54,23 +54,23 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi16(__m128i __A)
|
||||
_mm_popcnt_epi16(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
_mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
|
||||
(__v8hi) _mm128_popcnt_epi16(__B),
|
||||
(__v8hi) _mm_popcnt_epi16(__B),
|
||||
(__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
_mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
@ -98,29 +98,29 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi8(__m128i __A)
|
||||
_mm_popcnt_epi8(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
_mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
|
||||
(__v16qi) _mm128_popcnt_epi8(__B),
|
||||
(__v16qi) _mm_popcnt_epi8(__B),
|
||||
(__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
_mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
_mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
@ -128,15 +128,15 @@ _mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
|
||||
_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
|
||||
return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
@ -144,9 +144,9 @@ _mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
|
||||
_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
|
||||
return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
@ -31,13 +31,8 @@
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_setzero_hi(void) {
|
||||
return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
@ -45,15 +40,15 @@ _mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D)
|
||||
_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
@ -61,29 +56,29 @@ _mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D)
|
||||
_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
|
||||
_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
|
||||
_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
@ -91,15 +86,15 @@ _mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D)
|
||||
_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
@ -107,15 +102,15 @@ _mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D)
|
||||
_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) __S,
|
||||
@ -123,15 +118,15 @@ _mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
|
||||
_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
(__v8hi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) __S,
|
||||
@ -139,19 +134,13 @@ _mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
|
||||
_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
(__v16qi) _mm_setzero_si128(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_setzero_hi(void) {
|
||||
return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
@ -164,7 +153,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -180,7 +169,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -210,7 +199,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -226,7 +215,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -242,7 +231,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
(__v16hi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -258,7 +247,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
(__v32qi) _mm256_setzero_si256(),
|
||||
__U);
|
||||
}
|
||||
|
||||
@ -270,23 +259,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi64(A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi64(A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shldi_epi64(A, B, I) \
|
||||
_mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
|
||||
@ -296,23 +285,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi32(A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi32(A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shldi_epi32(A, B, I) \
|
||||
_mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
|
||||
@ -322,23 +311,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi16(A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi16(A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shldi_epi16(A, B, I) \
|
||||
_mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
|
||||
@ -348,23 +337,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi64(A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi64(A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shrdi_epi64(A, B, I) \
|
||||
_mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
|
||||
@ -374,23 +363,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi32(A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi32(A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shrdi_epi32(A, B, I) \
|
||||
_mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
|
||||
@ -400,23 +389,23 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
_mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi16(A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
#define _mm_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
#define _mm_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi16(A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
#define _mm_shrdi_epi16(A, B, I) \
|
||||
_mm_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
@ -446,7 +435,7 @@ _mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -455,7 +444,7 @@ _mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -464,7 +453,7 @@ _mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -500,7 +489,7 @@ _mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -509,7 +498,7 @@ _mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -518,7 +507,7 @@ _mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -554,7 +543,7 @@ _mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
@ -563,7 +552,7 @@ _mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
@ -572,7 +561,7 @@ _mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
@ -608,7 +597,7 @@ _mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -617,7 +606,7 @@ _mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -626,7 +615,7 @@ _mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
@ -662,7 +651,7 @@ _mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -671,7 +660,7 @@ _mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -680,7 +669,7 @@ _mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -716,7 +705,7 @@ _mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
@ -725,7 +714,7 @@ _mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
@ -734,7 +723,7 @@ _mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
|
@ -141,7 +141,7 @@ _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -150,7 +150,7 @@ _mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -159,7 +159,7 @@ _mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -168,7 +168,7 @@ _mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -177,7 +177,7 @@ _mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -186,7 +186,7 @@ _mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -195,7 +195,7 @@ _mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -204,7 +204,7 @@ _mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -213,7 +213,7 @@ _mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -222,7 +222,7 @@ _mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -231,7 +231,7 @@ _mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
@ -240,7 +240,7 @@ _mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
|
@ -10947,6 +10947,8 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
|
||||
if (var->isThisDeclarationADefinition() &&
|
||||
var->getDeclContext()->getRedeclContext()->isFileContext() &&
|
||||
var->isExternallyVisible() && var->hasLinkage() &&
|
||||
!var->isInline() && !var->getDescribedVarTemplate() &&
|
||||
!isTemplateInstantiation(var->getTemplateSpecializationKind()) &&
|
||||
!getDiagnostics().isIgnored(diag::warn_missing_variable_declarations,
|
||||
var->getLocation())) {
|
||||
// Find a previous declaration that's not a definition.
|
||||
|
@ -212,20 +212,21 @@ static void insertTargetAndModeArgs(const ParsedClangName &NameParts,
|
||||
// Put target and mode arguments at the start of argument list so that
|
||||
// arguments specified in command line could override them. Avoid putting
|
||||
// them at index 0, as an option like '-cc1' must remain the first.
|
||||
auto InsertionPoint = ArgVector.begin();
|
||||
if (InsertionPoint != ArgVector.end())
|
||||
int InsertionPoint = 0;
|
||||
if (ArgVector.size() > 0)
|
||||
++InsertionPoint;
|
||||
|
||||
if (NameParts.DriverMode) {
|
||||
// Add the mode flag to the arguments.
|
||||
ArgVector.insert(InsertionPoint,
|
||||
ArgVector.insert(ArgVector.begin() + InsertionPoint,
|
||||
GetStableCStr(SavedStrings, NameParts.DriverMode));
|
||||
}
|
||||
|
||||
if (NameParts.TargetIsValid) {
|
||||
const char *arr[] = {"-target", GetStableCStr(SavedStrings,
|
||||
NameParts.TargetPrefix)};
|
||||
ArgVector.insert(InsertionPoint, std::begin(arr), std::end(arr));
|
||||
ArgVector.insert(ArgVector.begin() + InsertionPoint,
|
||||
std::begin(arr), std::end(arr));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,6 +175,7 @@ struct Configuration {
|
||||
bool AppContainer = false;
|
||||
bool MinGW = false;
|
||||
bool WarnLocallyDefinedImported = true;
|
||||
bool KillAt = false;
|
||||
};
|
||||
|
||||
extern Configuration *Config;
|
||||
|
@ -970,6 +970,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
|
||||
if (Args.hasArg(OPT_lldsavetemps))
|
||||
Config->SaveTemps = true;
|
||||
|
||||
// Handle /kill-at
|
||||
if (Args.hasArg(OPT_kill_at))
|
||||
Config->KillAt = true;
|
||||
|
||||
// Handle /lldltocache
|
||||
if (auto *Arg = Args.getLastArg(OPT_lldltocache))
|
||||
Config->LTOCache = Arg->getValue();
|
||||
|
@ -561,6 +561,26 @@ static StringRef undecorate(StringRef Sym) {
|
||||
return Sym.startswith("_") ? Sym.substr(1) : Sym;
|
||||
}
|
||||
|
||||
// Convert stdcall/fastcall style symbols into unsuffixed symbols,
|
||||
// with or without a leading underscore. (MinGW specific.)
|
||||
static StringRef killAt(StringRef Sym, bool Prefix) {
|
||||
if (Sym.empty())
|
||||
return Sym;
|
||||
// Strip any trailing stdcall suffix
|
||||
Sym = Sym.substr(0, Sym.find('@', 1));
|
||||
if (!Sym.startswith("@")) {
|
||||
if (Prefix && !Sym.startswith("_"))
|
||||
return Saver.save("_" + Sym);
|
||||
return Sym;
|
||||
}
|
||||
// For fastcall, remove the leading @ and replace it with an
|
||||
// underscore, if prefixes are used.
|
||||
Sym = Sym.substr(1);
|
||||
if (Prefix)
|
||||
Sym = Saver.save("_" + Sym);
|
||||
return Sym;
|
||||
}
|
||||
|
||||
// Performs error checking on all /export arguments.
|
||||
// It also sets ordinals.
|
||||
void fixupExports() {
|
||||
@ -593,6 +613,15 @@ void fixupExports() {
|
||||
}
|
||||
}
|
||||
|
||||
if (Config->KillAt && Config->Machine == I386) {
|
||||
for (Export &E : Config->Exports) {
|
||||
E.Name = killAt(E.Name, true);
|
||||
E.ExportName = killAt(E.ExportName, false);
|
||||
E.ExtName = killAt(E.ExtName, true);
|
||||
E.SymbolName = killAt(E.SymbolName, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Uniquefy by name.
|
||||
DenseMap<StringRef, Export *> Map(Config->Exports.size());
|
||||
std::vector<Export> V;
|
||||
|
@ -121,6 +121,7 @@ def help_q : Flag<["/?", "-?"], "">, Alias<help>;
|
||||
def debug_ghash : F<"debug:ghash">;
|
||||
def debug_dwarf : F<"debug:dwarf">;
|
||||
def export_all_symbols : F<"export-all-symbols">;
|
||||
def kill_at : F<"kill-at">;
|
||||
def lldmingw : F<"lldmingw">;
|
||||
def msvclto : F<"msvclto">;
|
||||
def output_def : Joined<["/", "-"], "output-def:">;
|
||||
|
@ -296,7 +296,8 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *Buf) const {
|
||||
write32<E>(Buf + 20, 0x0018c082); // srl $24, $24, 2
|
||||
}
|
||||
|
||||
write32<E>(Buf + 24, 0x0320f809); // jalr $25
|
||||
uint32_t JalrInst = Config->ZHazardplt ? 0x0320fc09 : 0x0320f809;
|
||||
write32<E>(Buf + 24, JalrInst); // jalr.hb $25 or jalr $25
|
||||
write32<E>(Buf + 28, 0x2718fffe); // subu $24, $24, 2
|
||||
|
||||
uint64_t GotPlt = InX::GotPlt->getVA();
|
||||
@ -330,9 +331,12 @@ void MIPS<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t JrInst = isMipsR6() ? (Config->ZHazardplt ? 0x03200409 : 0x03200009)
|
||||
: (Config->ZHazardplt ? 0x03200408 : 0x03200008);
|
||||
|
||||
write32<E>(Buf, 0x3c0f0000); // lui $15, %hi(.got.plt entry)
|
||||
write32<E>(Buf + 4, 0x8df90000); // l[wd] $25, %lo(.got.plt entry)($15)
|
||||
write32<E>(Buf + 8, isMipsR6() ? 0x03200009 : 0x03200008); // jr $25
|
||||
write32<E>(Buf + 8, JrInst); // jr $25 / jr.hb $25
|
||||
write32<E>(Buf + 12, 0x25f80000); // addiu $24, $15, %lo(.got.plt entry)
|
||||
writeRelocation<E>(Buf, GotPltEntryAddr + 0x8000, 16, 16);
|
||||
writeRelocation<E>(Buf + 4, GotPltEntryAddr, 16, 0);
|
||||
|
@ -151,6 +151,7 @@ struct Configuration {
|
||||
bool WarnMissingEntry;
|
||||
bool ZCombreloc;
|
||||
bool ZExecstack;
|
||||
bool ZHazardplt;
|
||||
bool ZNocopyreloc;
|
||||
bool ZNodelete;
|
||||
bool ZNodlopen;
|
||||
|
@ -668,6 +668,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
|
||||
Config->WarnCommon = Args.hasArg(OPT_warn_common);
|
||||
Config->ZCombreloc = !hasZOption(Args, "nocombreloc");
|
||||
Config->ZExecstack = hasZOption(Args, "execstack");
|
||||
Config->ZHazardplt = hasZOption(Args, "hazardplt");
|
||||
Config->ZNocopyreloc = hasZOption(Args, "nocopyreloc");
|
||||
Config->ZNodelete = hasZOption(Args, "nodelete");
|
||||
Config->ZNodlopen = hasZOption(Args, "nodlopen");
|
||||
|
@ -1,3 +1,3 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#define FREEBSD_CC_VERSION 1200014
|
||||
#define FREEBSD_CC_VERSION 1200015
|
||||
|
@ -8,4 +8,4 @@
|
||||
|
||||
#define CLANG_VENDOR "FreeBSD "
|
||||
|
||||
#define SVN_REVISION "326565"
|
||||
#define SVN_REVISION "335540"
|
||||
|
@ -7,4 +7,4 @@
|
||||
|
||||
#define LLD_REPOSITORY_STRING "FreeBSD"
|
||||
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
|
||||
#define LLD_REVISION_STRING "326565-1200002"
|
||||
#define LLD_REVISION_STRING "335540-1200003"
|
||||
|
@ -1,2 +1,2 @@
|
||||
/* $FreeBSD$ */
|
||||
#define LLVM_REVISION "svn-r326565"
|
||||
#define LLVM_REVISION "svn-r335540"
|
||||
|
Loading…
x
Reference in New Issue
Block a user