Revert r332501 for now, as it can cause build failures on i386.
Reported upstream as <https://bugs.llvm.org/show_bug.cgi?id=37133>. Reported by: emaste, ci.freebsd.org PR: 225330
This commit is contained in:
parent
0ae629bdd6
commit
6ec30ab86a
@ -449,13 +449,6 @@ class MachineBasicBlock
|
||||
/// Replace successor OLD with NEW and update probability info.
|
||||
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
|
||||
|
||||
/// Copy a successor (and any probability info) from original block to this
|
||||
/// block's. Uses an iterator into the original blocks successors.
|
||||
///
|
||||
/// This is useful when doing a partial clone of successors. Afterward, the
|
||||
/// probabilities may need to be normalized.
|
||||
void copySuccessor(MachineBasicBlock *Orig, succ_iterator I);
|
||||
|
||||
/// Transfers all the successors from MBB to this machine basic block (i.e.,
|
||||
/// copies all the successors FromMBB and remove all the successors from
|
||||
/// FromMBB).
|
||||
|
@ -646,14 +646,6 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
|
||||
removeSuccessor(OldI);
|
||||
}
|
||||
|
||||
void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
|
||||
succ_iterator I) {
|
||||
if (Orig->Probs.empty())
|
||||
addSuccessor(*I, Orig->getSuccProbability(I));
|
||||
else
|
||||
addSuccessorWithoutProb(*I);
|
||||
}
|
||||
|
||||
void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
|
||||
Predecessors.push_back(Pred);
|
||||
}
|
||||
|
@ -265,10 +265,13 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
|
||||
/// @param reg - The Reg to append.
|
||||
static void translateRegister(MCInst &mcInst, Reg reg) {
|
||||
#define ENTRY(x) X86::x,
|
||||
static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
|
||||
uint8_t llvmRegnums[] = {
|
||||
ALL_REGS
|
||||
0
|
||||
};
|
||||
#undef ENTRY
|
||||
|
||||
MCPhysReg llvmRegnum = llvmRegnums[reg];
|
||||
uint8_t llvmRegnum = llvmRegnums[reg];
|
||||
mcInst.addOperand(MCOperand::createReg(llvmRegnum));
|
||||
}
|
||||
|
||||
|
@ -66,9 +66,6 @@ FunctionPass *createX86OptimizeLEAs();
|
||||
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
|
||||
FunctionPass *createX86FixupSetCC();
|
||||
|
||||
/// Return a pass that lowers EFLAGS copy pseudo instructions.
|
||||
FunctionPass *createX86FlagsCopyLoweringPass();
|
||||
|
||||
/// Return a pass that expands WinAlloca pseudo-instructions.
|
||||
FunctionPass *createX86WinAllocaExpander();
|
||||
|
||||
|
@ -1,734 +0,0 @@
|
||||
//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
|
||||
/// flag bits.
|
||||
///
|
||||
/// We have to do this by carefully analyzing and rewriting the usage of the
|
||||
/// copied EFLAGS register because there is no general way to rematerialize the
|
||||
/// entire EFLAGS register safely and efficiently. Using `popf` both forces
|
||||
/// dynamic stack adjustment and can create correctness issues due to IF, TF,
|
||||
/// and other non-status flags being overwritten. Using sequences involving
|
||||
/// SAHF don't work on all x86 processors and are often quite slow compared to
|
||||
/// directly testing a single status preserved in its own GPR.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/ScopeExit.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/SparseBitVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineSSAUpdater.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/IR/DebugLoc.h"
|
||||
#include "llvm/MC/MCSchedule.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define PASS_KEY "x86-flags-copy-lowering"
|
||||
#define DEBUG_TYPE PASS_KEY
|
||||
|
||||
STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
|
||||
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
|
||||
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
|
||||
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
|
||||
|
||||
namespace llvm {
|
||||
|
||||
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
namespace {
|
||||
|
||||
// Convenient array type for storing registers associated with each condition.
|
||||
using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
|
||||
|
||||
class X86FlagsCopyLoweringPass : public MachineFunctionPass {
|
||||
public:
|
||||
X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {
|
||||
initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
|
||||
/// Pass identification, replacement for typeid.
|
||||
static char ID;
|
||||
|
||||
private:
|
||||
MachineRegisterInfo *MRI;
|
||||
const X86InstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const TargetRegisterClass *PromoteRC;
|
||||
|
||||
CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
|
||||
MachineInstr &CopyDefI);
|
||||
|
||||
unsigned promoteCondToReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, X86::CondCode Cond);
|
||||
std::pair<unsigned, bool>
|
||||
getCondOrInverseInReg(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
X86::CondCode Cond, CondRegArray &CondRegs);
|
||||
void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
|
||||
DebugLoc Loc, unsigned Reg);
|
||||
|
||||
void rewriteArithmetic(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
MachineInstr &MI, MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs);
|
||||
void rewriteCMov(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
MachineInstr &CMovI, MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs);
|
||||
void rewriteCondJmp(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
MachineInstr &JmpI, CondRegArray &CondRegs);
|
||||
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
|
||||
MachineInstr &CopyDefI);
|
||||
void rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
|
||||
MachineInstr &SetCCI, MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
|
||||
"X86 EFLAGS copy lowering", false, false)
|
||||
INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
|
||||
"X86 EFLAGS copy lowering", false, false)
|
||||
|
||||
FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
|
||||
return new X86FlagsCopyLoweringPass();
|
||||
}
|
||||
|
||||
char X86FlagsCopyLoweringPass::ID = 0;
|
||||
|
||||
void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// An enumeration of the arithmetic instruction mnemonics which have
|
||||
/// interesting flag semantics.
|
||||
///
|
||||
/// We can map instruction opcodes into these mnemonics to make it easy to
|
||||
/// dispatch with specific functionality.
|
||||
enum class FlagArithMnemonic {
|
||||
ADC,
|
||||
ADCX,
|
||||
ADOX,
|
||||
RCL,
|
||||
RCR,
|
||||
SBB,
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
report_fatal_error("No support for lowering a copy into EFLAGS when used "
|
||||
"by this instruction!");
|
||||
|
||||
#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \
|
||||
case X86::MNEMONIC##8##SUFFIX: \
|
||||
case X86::MNEMONIC##16##SUFFIX: \
|
||||
case X86::MNEMONIC##32##SUFFIX: \
|
||||
case X86::MNEMONIC##64##SUFFIX:
|
||||
|
||||
#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \
|
||||
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \
|
||||
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \
|
||||
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \
|
||||
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \
|
||||
case X86::MNEMONIC##8ri: \
|
||||
case X86::MNEMONIC##16ri8: \
|
||||
case X86::MNEMONIC##32ri8: \
|
||||
case X86::MNEMONIC##64ri8: \
|
||||
case X86::MNEMONIC##16ri: \
|
||||
case X86::MNEMONIC##32ri: \
|
||||
case X86::MNEMONIC##64ri32: \
|
||||
case X86::MNEMONIC##8mi: \
|
||||
case X86::MNEMONIC##16mi8: \
|
||||
case X86::MNEMONIC##32mi8: \
|
||||
case X86::MNEMONIC##64mi8: \
|
||||
case X86::MNEMONIC##16mi: \
|
||||
case X86::MNEMONIC##32mi: \
|
||||
case X86::MNEMONIC##64mi32: \
|
||||
case X86::MNEMONIC##8i8: \
|
||||
case X86::MNEMONIC##16i16: \
|
||||
case X86::MNEMONIC##32i32: \
|
||||
case X86::MNEMONIC##64i32:
|
||||
|
||||
LLVM_EXPAND_ADC_SBB_INSTR(ADC)
|
||||
return FlagArithMnemonic::ADC;
|
||||
|
||||
LLVM_EXPAND_ADC_SBB_INSTR(SBB)
|
||||
return FlagArithMnemonic::SBB;
|
||||
|
||||
#undef LLVM_EXPAND_ADC_SBB_INSTR
|
||||
|
||||
LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
|
||||
LLVM_EXPAND_INSTR_SIZES(RCL, r1)
|
||||
LLVM_EXPAND_INSTR_SIZES(RCL, ri)
|
||||
return FlagArithMnemonic::RCL;
|
||||
|
||||
LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
|
||||
LLVM_EXPAND_INSTR_SIZES(RCR, r1)
|
||||
LLVM_EXPAND_INSTR_SIZES(RCR, ri)
|
||||
return FlagArithMnemonic::RCR;
|
||||
|
||||
#undef LLVM_EXPAND_INSTR_SIZES
|
||||
|
||||
case X86::ADCX32rr:
|
||||
case X86::ADCX64rr:
|
||||
case X86::ADCX32rm:
|
||||
case X86::ADCX64rm:
|
||||
return FlagArithMnemonic::ADCX;
|
||||
|
||||
case X86::ADOX32rr:
|
||||
case X86::ADOX64rr:
|
||||
case X86::ADOX32rm:
|
||||
case X86::ADOX64rm:
|
||||
return FlagArithMnemonic::ADOX;
|
||||
}
|
||||
}
|
||||
|
||||
static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
|
||||
MachineInstr &SplitI,
|
||||
const X86InstrInfo &TII) {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
|
||||
assert(SplitI.getParent() == &MBB &&
|
||||
"Split instruction must be in the split block!");
|
||||
assert(SplitI.isBranch() &&
|
||||
"Only designed to split a tail of branch instructions!");
|
||||
assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID &&
|
||||
"Must split on an actual jCC instruction!");
|
||||
|
||||
// Dig out the previous instruction to the split point.
|
||||
MachineInstr &PrevI = *std::prev(SplitI.getIterator());
|
||||
assert(PrevI.isBranch() && "Must split after a branch!");
|
||||
assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID &&
|
||||
"Must split after an actual jCC instruction!");
|
||||
assert(!std::prev(PrevI.getIterator())->isTerminator() &&
|
||||
"Must only have this one terminator prior to the split!");
|
||||
|
||||
// Grab the one successor edge that will stay in `MBB`.
|
||||
MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
|
||||
|
||||
// Analyze the original block to see if we are actually splitting an edge
|
||||
// into two edges. This can happen when we have multiple conditional jumps to
|
||||
// the same successor.
|
||||
bool IsEdgeSplit =
|
||||
std::any_of(SplitI.getIterator(), MBB.instr_end(),
|
||||
[&](MachineInstr &MI) {
|
||||
assert(MI.isTerminator() &&
|
||||
"Should only have spliced terminators!");
|
||||
return llvm::any_of(
|
||||
MI.operands(), [&](MachineOperand &MOp) {
|
||||
return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
|
||||
});
|
||||
}) ||
|
||||
MBB.getFallThrough() == &UnsplitSucc;
|
||||
|
||||
MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
|
||||
|
||||
// Insert the new block immediately after the current one. Any existing
|
||||
// fallthrough will be sunk into this new block anyways.
|
||||
MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
|
||||
|
||||
// Splice the tail of instructions into the new block.
|
||||
NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
|
||||
|
||||
// Copy the necessary succesors (and their probability info) into the new
|
||||
// block.
|
||||
for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
|
||||
if (IsEdgeSplit || *SI != &UnsplitSucc)
|
||||
NewMBB.copySuccessor(&MBB, SI);
|
||||
// Normalize the probabilities if we didn't end up splitting the edge.
|
||||
if (!IsEdgeSplit)
|
||||
NewMBB.normalizeSuccProbs();
|
||||
|
||||
// Now replace all of the moved successors in the original block with the new
|
||||
// block. This will merge their probabilities.
|
||||
for (MachineBasicBlock *Succ : NewMBB.successors())
|
||||
if (Succ != &UnsplitSucc)
|
||||
MBB.replaceSuccessor(Succ, &NewMBB);
|
||||
|
||||
// We should always end up replacing at least one successor.
|
||||
assert(MBB.isSuccessor(&NewMBB) &&
|
||||
"Failed to make the new block a successor!");
|
||||
|
||||
// Now update all the PHIs.
|
||||
for (MachineBasicBlock *Succ : NewMBB.successors()) {
|
||||
for (MachineInstr &MI : *Succ) {
|
||||
if (!MI.isPHI())
|
||||
break;
|
||||
|
||||
for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
|
||||
OpIdx += 2) {
|
||||
MachineOperand &OpV = MI.getOperand(OpIdx);
|
||||
MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
|
||||
assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
|
||||
if (OpMBB.getMBB() != &MBB)
|
||||
continue;
|
||||
|
||||
// Replace the operand for unsplit successors
|
||||
if (!IsEdgeSplit || Succ != &UnsplitSucc) {
|
||||
OpMBB.setMBB(&NewMBB);
|
||||
|
||||
// We have to continue scanning as there may be multiple entries in
|
||||
// the PHI.
|
||||
continue;
|
||||
}
|
||||
|
||||
// When we have split the edge append a new successor.
|
||||
MI.addOperand(MF, OpV);
|
||||
MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NewMBB;
|
||||
}
|
||||
|
||||
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
|
||||
<< " **********\n");
|
||||
|
||||
auto &Subtarget = MF.getSubtarget<X86Subtarget>();
|
||||
MRI = &MF.getRegInfo();
|
||||
TII = Subtarget.getInstrInfo();
|
||||
TRI = Subtarget.getRegisterInfo();
|
||||
PromoteRC = &X86::GR8RegClass;
|
||||
|
||||
if (MF.begin() == MF.end())
|
||||
// Nothing to do for a degenerate empty function...
|
||||
return false;
|
||||
|
||||
SmallVector<MachineInstr *, 4> Copies;
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
for (MachineInstr &MI : MBB)
|
||||
if (MI.getOpcode() == TargetOpcode::COPY &&
|
||||
MI.getOperand(0).getReg() == X86::EFLAGS)
|
||||
Copies.push_back(&MI);
|
||||
|
||||
for (MachineInstr *CopyI : Copies) {
|
||||
MachineBasicBlock &MBB = *CopyI->getParent();
|
||||
|
||||
MachineOperand &VOp = CopyI->getOperand(1);
|
||||
assert(VOp.isReg() &&
|
||||
"The input to the copy for EFLAGS should always be a register!");
|
||||
MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
|
||||
if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
|
||||
// FIXME: The big likely candidate here are PHI nodes. We could in theory
|
||||
// handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
|
||||
// enough that it is probably better to change every other part of LLVM
|
||||
// to avoid creating them. The issue is that once we have PHIs we won't
|
||||
// know which original EFLAGS value we need to capture with our setCCs
|
||||
// below. The end result will be computing a complete set of setCCs that
|
||||
// we *might* want, computing them in every place where we copy *out* of
|
||||
// EFLAGS and then doing SSA formation on all of them to insert necessary
|
||||
// PHI nodes and consume those here. Then hoping that somehow we DCE the
|
||||
// unnecessary ones. This DCE seems very unlikely to be successful and so
|
||||
// we will almost certainly end up with a glut of dead setCC
|
||||
// instructions. Until we have a motivating test case and fail to avoid
|
||||
// it by changing other parts of LLVM's lowering, we refuse to handle
|
||||
// this complex case here.
|
||||
DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
|
||||
CopyDefI.dump());
|
||||
report_fatal_error(
|
||||
"Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
|
||||
}
|
||||
|
||||
auto Cleanup = make_scope_exit([&] {
|
||||
// All uses of the EFLAGS copy are now rewritten, kill the copy into
|
||||
// eflags and if dead the copy from.
|
||||
CopyI->eraseFromParent();
|
||||
if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
|
||||
CopyDefI.eraseFromParent();
|
||||
++NumCopiesEliminated;
|
||||
});
|
||||
|
||||
MachineOperand &DOp = CopyI->getOperand(0);
|
||||
assert(DOp.isDef() && "Expected register def!");
|
||||
assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
|
||||
if (DOp.isDead())
|
||||
continue;
|
||||
|
||||
MachineBasicBlock &TestMBB = *CopyDefI.getParent();
|
||||
auto TestPos = CopyDefI.getIterator();
|
||||
DebugLoc TestLoc = CopyDefI.getDebugLoc();
|
||||
|
||||
DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
|
||||
|
||||
// Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer
|
||||
// jumps because their usage is very constrained.
|
||||
bool FlagsKilled = false;
|
||||
SmallVector<MachineInstr *, 4> JmpIs;
|
||||
|
||||
// Gather the condition flags that have already been preserved in
|
||||
// registers. We do this from scratch each time as we expect there to be
|
||||
// very few of them and we expect to not revisit the same copy definition
|
||||
// many times. If either of those change sufficiently we could build a map
|
||||
// of these up front instead.
|
||||
CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);
|
||||
|
||||
for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
|
||||
MII != MIE;) {
|
||||
MachineInstr &MI = *MII++;
|
||||
MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
|
||||
if (!FlagUse) {
|
||||
if (MI.findRegisterDefOperand(X86::EFLAGS)) {
|
||||
// If EFLAGS are defined, it's as-if they were killed. We can stop
|
||||
// scanning here.
|
||||
//
|
||||
// NB!!! Many instructions only modify some flags. LLVM currently
|
||||
// models this as clobbering all flags, but if that ever changes this
|
||||
// will need to be carefully updated to handle that more complex
|
||||
// logic.
|
||||
FlagsKilled = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << " Rewriting use: "; MI.dump());
|
||||
|
||||
// Check the kill flag before we rewrite as that may change it.
|
||||
if (FlagUse->isKill())
|
||||
FlagsKilled = true;
|
||||
|
||||
// Once we encounter a branch, the rest of the instructions must also be
|
||||
// branches. We can't rewrite in place here, so we handle them below.
|
||||
//
|
||||
// Note that we don't have to handle tail calls here, even conditional
|
||||
// tail calls, as those are not introduced into the X86 MI until post-RA
|
||||
// branch folding or black placement. As a consequence, we get to deal
|
||||
// with the simpler formulation of conditional branches followed by tail
|
||||
// calls.
|
||||
if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
|
||||
auto JmpIt = MI.getIterator();
|
||||
do {
|
||||
JmpIs.push_back(&*JmpIt);
|
||||
++JmpIt;
|
||||
} while (JmpIt != MBB.instr_end() &&
|
||||
X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
|
||||
X86::COND_INVALID);
|
||||
break;
|
||||
}
|
||||
|
||||
// Otherwise we can just rewrite in-place.
|
||||
if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
|
||||
rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
|
||||
} else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
|
||||
rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
|
||||
} else if (MI.getOpcode() == TargetOpcode::COPY) {
|
||||
rewriteCopy(MI, *FlagUse, CopyDefI);
|
||||
} else {
|
||||
// We assume that arithmetic instructions that use flags also def them.
|
||||
assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
|
||||
"Expected a def of EFLAGS for this instruction!");
|
||||
|
||||
// NB!!! Several arithmetic instructions only *partially* update
|
||||
// flags. Theoretically, we could generate MI code sequences that
|
||||
// would rely on this fact and observe different flags independently.
|
||||
// But currently LLVM models all of these instructions as clobbering
|
||||
// all the flags in an undef way. We rely on that to simplify the
|
||||
// logic.
|
||||
FlagsKilled = true;
|
||||
|
||||
rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
|
||||
break;
|
||||
}
|
||||
|
||||
// If this was the last use of the flags, we're done.
|
||||
if (FlagsKilled)
|
||||
break;
|
||||
}
|
||||
|
||||
// If we didn't find a kill (or equivalent) check that the flags don't
|
||||
// live-out of the basic block. Currently we don't support lowering copies
|
||||
// of flags that live out in this fashion.
|
||||
if (!FlagsKilled &&
|
||||
llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
|
||||
return SuccMBB->isLiveIn(X86::EFLAGS);
|
||||
})) {
|
||||
DEBUG({
|
||||
dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
|
||||
<< "----\n";
|
||||
MBB.dump();
|
||||
dbgs() << "----\n"
|
||||
<< "ERROR: Cannot lower this EFLAGS copy!\n";
|
||||
});
|
||||
report_fatal_error(
|
||||
"Cannot lower EFLAGS copy that lives out of a basic block!");
|
||||
}
|
||||
|
||||
// Now rewrite the jumps that use the flags. These we handle specially
|
||||
// because if there are multiple jumps we'll have to do surgery on the CFG.
|
||||
for (MachineInstr *JmpI : JmpIs) {
|
||||
// Past the first jump we need to split the blocks apart.
|
||||
if (JmpI != JmpIs.front())
|
||||
splitBlock(*JmpI->getParent(), *JmpI, *TII);
|
||||
|
||||
rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
|
||||
}
|
||||
|
||||
// FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
|
||||
// the copy's def operand is itself a kill.
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
for (MachineInstr &MI : MBB)
|
||||
if (MI.getOpcode() == TargetOpcode::COPY &&
|
||||
(MI.getOperand(0).getReg() == X86::EFLAGS ||
|
||||
MI.getOperand(1).getReg() == X86::EFLAGS)) {
|
||||
DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump());
|
||||
llvm_unreachable("Unlowered EFLAGS copy!");
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Collect any conditions that have already been set in registers so that we
|
||||
/// can re-use them rather than adding duplicates.
|
||||
CondRegArray
|
||||
X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB,
|
||||
MachineInstr &CopyDefI) {
|
||||
CondRegArray CondRegs = {};
|
||||
|
||||
// Scan backwards across the range of instructions with live EFLAGS.
|
||||
for (MachineInstr &MI : llvm::reverse(
|
||||
llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) {
|
||||
X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
|
||||
if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() &&
|
||||
TRI->isVirtualRegister(MI.getOperand(0).getReg()))
|
||||
CondRegs[Cond] = MI.getOperand(0).getReg();
|
||||
|
||||
// Stop scanning when we see the first definition of the EFLAGS as prior to
|
||||
// this we would potentially capture the wrong flag state.
|
||||
if (MI.findRegisterDefOperand(X86::EFLAGS))
|
||||
break;
|
||||
}
|
||||
return CondRegs;
|
||||
}
|
||||
|
||||
unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
|
||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, X86::CondCode Cond) {
|
||||
unsigned Reg = MRI->createVirtualRegister(PromoteRC);
|
||||
auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
|
||||
TII->get(X86::getSETFromCond(Cond)), Reg);
|
||||
(void)SetI;
|
||||
DEBUG(dbgs() << " save cond: "; SetI->dump());
|
||||
++NumSetCCsInserted;
|
||||
return Reg;
|
||||
}
|
||||
|
||||
std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
|
||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
|
||||
unsigned &CondReg = CondRegs[Cond];
|
||||
unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
|
||||
if (!CondReg && !InvCondReg)
|
||||
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
|
||||
|
||||
if (CondReg)
|
||||
return {CondReg, false};
|
||||
else
|
||||
return {InvCondReg, true};
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator Pos,
|
||||
DebugLoc Loc, unsigned Reg) {
|
||||
// We emit test instructions as register/immediate test against -1. This
|
||||
// allows register allocation to fold a memory operand if needed (that will
|
||||
// happen often due to the places this code is emitted). But hopefully will
|
||||
// also allow us to select a shorter encoding of `testb %reg, %reg` when that
|
||||
// would be equivalent.
|
||||
auto TestI =
|
||||
BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1);
|
||||
(void)TestI;
|
||||
DEBUG(dbgs() << " test cond: "; TestI->dump());
|
||||
++NumTestsInserted;
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteArithmetic(
|
||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs) {
|
||||
// Arithmetic is either reading CF or OF. Figure out which condition we need
|
||||
// to preserve in a register.
|
||||
X86::CondCode Cond;
|
||||
|
||||
// The addend to use to reset CF or OF when added to the flag value.
|
||||
int Addend;
|
||||
|
||||
switch (getMnemonicFromOpcode(MI.getOpcode())) {
|
||||
case FlagArithMnemonic::ADC:
|
||||
case FlagArithMnemonic::ADCX:
|
||||
case FlagArithMnemonic::RCL:
|
||||
case FlagArithMnemonic::RCR:
|
||||
case FlagArithMnemonic::SBB:
|
||||
Cond = X86::COND_B; // CF == 1
|
||||
// Set up an addend that when one is added will need a carry due to not
|
||||
// having a higher bit available.
|
||||
Addend = 255;
|
||||
break;
|
||||
|
||||
case FlagArithMnemonic::ADOX:
|
||||
Cond = X86::COND_O; // OF == 1
|
||||
// Set up an addend that when one is added will turn from positive to
|
||||
// negative and thus overflow in the signed domain.
|
||||
Addend = 127;
|
||||
break;
|
||||
}
|
||||
|
||||
// Now get a register that contains the value of the flag input to the
|
||||
// arithmetic. We require exactly this flag to simplify the arithmetic
|
||||
// required to materialize it back into the flag.
|
||||
unsigned &CondReg = CondRegs[Cond];
|
||||
if (!CondReg)
|
||||
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
// Insert an instruction that will set the flag back to the desired value.
|
||||
unsigned TmpReg = MRI->createVirtualRegister(PromoteRC);
|
||||
auto AddI =
|
||||
BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
|
||||
.addDef(TmpReg, RegState::Dead)
|
||||
.addReg(CondReg)
|
||||
.addImm(Addend);
|
||||
(void)AddI;
|
||||
DEBUG(dbgs() << " add cond: "; AddI->dump());
|
||||
++NumAddsInserted;
|
||||
FlagUse.setIsKill(true);
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc,
|
||||
MachineInstr &CMovI,
|
||||
MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs) {
|
||||
// First get the register containing this specific condition.
|
||||
X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode());
|
||||
unsigned CondReg;
|
||||
bool Inverted;
|
||||
std::tie(CondReg, Inverted) =
|
||||
getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
|
||||
|
||||
MachineBasicBlock &MBB = *CMovI.getParent();
|
||||
|
||||
// Insert a direct test of the saved register.
|
||||
insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
|
||||
|
||||
// Rewrite the CMov to use the !ZF flag from the test (but match register
|
||||
// size and memory operand), and then kill its use of the flags afterward.
|
||||
auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg());
|
||||
CMovI.setDesc(TII->get(X86::getCMovFromCond(
|
||||
Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8,
|
||||
!CMovI.memoperands_empty())));
|
||||
FlagUse.setIsKill(true);
|
||||
DEBUG(dbgs() << " fixed cmov: "; CMovI.dump());
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteCondJmp(
|
||||
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
|
||||
// First get the register containing this specific condition.
|
||||
X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode());
|
||||
unsigned CondReg;
|
||||
bool Inverted;
|
||||
std::tie(CondReg, Inverted) =
|
||||
getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
|
||||
|
||||
MachineBasicBlock &JmpMBB = *JmpI.getParent();
|
||||
|
||||
// Insert a direct test of the saved register.
|
||||
insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
|
||||
|
||||
// Rewrite the jump to use the !ZF flag from the test, and kill its use of
|
||||
// flags afterward.
|
||||
JmpI.setDesc(TII->get(
|
||||
X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE)));
|
||||
const int ImplicitEFLAGSOpIdx = 1;
|
||||
JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true);
|
||||
DEBUG(dbgs() << " fixed jCC: "; JmpI.dump());
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
|
||||
MachineOperand &FlagUse,
|
||||
MachineInstr &CopyDefI) {
|
||||
// Just replace this copy with the the original copy def.
|
||||
MRI->replaceRegWith(MI.getOperand(0).getReg(),
|
||||
CopyDefI.getOperand(0).getReg());
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
|
||||
MachineBasicBlock::iterator TestPos,
|
||||
DebugLoc TestLoc,
|
||||
MachineInstr &SetCCI,
|
||||
MachineOperand &FlagUse,
|
||||
CondRegArray &CondRegs) {
|
||||
X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
|
||||
// Note that we can't usefully rewrite this to the inverse without complex
|
||||
// analysis of the users of the setCC. Largely we rely on duplicates which
|
||||
// could have been avoided already being avoided here.
|
||||
unsigned &CondReg = CondRegs[Cond];
|
||||
if (!CondReg)
|
||||
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
|
||||
|
||||
// Rewriting this is trivial: we just replace the register and remove the
|
||||
// setcc.
|
||||
MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
|
||||
SetCCI.eraseFromParent();
|
||||
}
|
@ -27781,16 +27781,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
|
||||
unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
|
||||
MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
|
||||
// Permit reads of the EFLAGS and DF registers without them being defined.
|
||||
// Permit reads of the FLAGS register without it being defined.
|
||||
// This intrinsic exists to read external processor state in flags, such as
|
||||
// the trap flag, interrupt flag, and direction flag, none of which are
|
||||
// modeled by the backend.
|
||||
assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
|
||||
"Unexpected register in operand!");
|
||||
Push->getOperand(2).setIsUndef();
|
||||
assert(Push->getOperand(3).getReg() == X86::DF &&
|
||||
"Unexpected register in operand!");
|
||||
Push->getOperand(3).setIsUndef();
|
||||
BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
|
||||
|
||||
MI.eraseFromParent(); // The pseudo is gone now.
|
||||
@ -37834,6 +37829,25 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
|
||||
}
|
||||
}
|
||||
|
||||
/// This function checks if any of the users of EFLAGS copies the EFLAGS. We
|
||||
/// know that the code that lowers COPY of EFLAGS has to use the stack, and if
|
||||
/// we don't adjust the stack we clobber the first frame index.
|
||||
/// See X86InstrInfo::copyPhysReg.
|
||||
static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) {
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
return any_of(MRI.reg_instructions(X86::EFLAGS),
|
||||
[](const MachineInstr &RI) { return RI.isCopy(); });
|
||||
}
|
||||
|
||||
void X86TargetLowering::finalizeLowering(MachineFunction &MF) const {
|
||||
if (hasCopyImplyingStackAdjustment(MF)) {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
MFI.setHasCopyImplyingStackAdjustment(true);
|
||||
}
|
||||
|
||||
TargetLoweringBase::finalizeLowering(MF);
|
||||
}
|
||||
|
||||
/// This method query the target whether it is beneficial for dag combiner to
|
||||
/// promote the specified node. If true, it should return the desired promotion
|
||||
/// type by reference.
|
||||
|
@ -1100,6 +1100,8 @@ namespace llvm {
|
||||
unsigned Factor) const override;
|
||||
|
||||
|
||||
void finalizeLowering(MachineFunction &MF) const override;
|
||||
|
||||
protected:
|
||||
std::pair<const TargetRegisterClass *, uint8_t>
|
||||
findRepresentativeClass(const TargetRegisterInfo *TRI,
|
||||
|
@ -473,7 +473,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
|
||||
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
usesCustomInserter = 1, Uses = [ESP, SSP] in {
|
||||
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
"# TLS_addr32",
|
||||
@ -493,7 +493,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
||||
ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
usesCustomInserter = 1, Uses = [RSP, SSP] in {
|
||||
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
"# TLS_addr64",
|
||||
@ -509,7 +509,7 @@ def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
// For i386, the address of the thunk is passed on the stack, on return the
|
||||
// address of the variable is in %eax. %ecx is trashed during the function
|
||||
// call. All other registers are preserved.
|
||||
let Defs = [EAX, ECX, EFLAGS, DF],
|
||||
let Defs = [EAX, ECX, EFLAGS],
|
||||
Uses = [ESP, SSP],
|
||||
usesCustomInserter = 1 in
|
||||
def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
@ -522,7 +522,7 @@ def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
// %rdi. The lowering will do the right thing with RDI.
|
||||
// On return the address of the variable is in %rax. All other
|
||||
// registers are preserved.
|
||||
let Defs = [RAX, EFLAGS, DF],
|
||||
let Defs = [RAX, EFLAGS],
|
||||
Uses = [RSP, SSP],
|
||||
usesCustomInserter = 1 in
|
||||
def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
|
@ -5782,7 +5782,7 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
|
||||
return false;
|
||||
}
|
||||
|
||||
X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
|
||||
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
|
||||
switch (BrOpc) {
|
||||
default: return X86::COND_INVALID;
|
||||
case X86::JE_1: return X86::COND_E;
|
||||
@ -5805,7 +5805,7 @@ X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
|
||||
}
|
||||
|
||||
/// Return condition code of a SET opcode.
|
||||
X86::CondCode X86::getCondFromSETOpc(unsigned Opc) {
|
||||
static X86::CondCode getCondFromSETOpc(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default: return X86::COND_INVALID;
|
||||
case X86::SETAr: case X86::SETAm: return X86::COND_A;
|
||||
@ -6130,7 +6130,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
|
||||
if (!I->isBranch())
|
||||
assert(0 && "Can't find the branch to replace!");
|
||||
|
||||
X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode());
|
||||
X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
|
||||
assert(BranchCond.size() == 1);
|
||||
if (CC != BranchCond[0].getImm())
|
||||
continue;
|
||||
@ -6237,7 +6237,7 @@ bool X86InstrInfo::AnalyzeBranchImpl(
|
||||
}
|
||||
|
||||
// Handle conditional branches.
|
||||
X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode());
|
||||
X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
|
||||
if (BranchCode == X86::COND_INVALID)
|
||||
return true; // Can't handle indirect branch.
|
||||
|
||||
@ -6433,7 +6433,7 @@ unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
|
||||
if (I->isDebugValue())
|
||||
continue;
|
||||
if (I->getOpcode() != X86::JMP_1 &&
|
||||
X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
|
||||
getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
|
||||
break;
|
||||
// Remove the branch.
|
||||
I->eraseFromParent();
|
||||
@ -6710,12 +6710,102 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
}
|
||||
|
||||
if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
|
||||
// FIXME: We use a fatal error here because historically LLVM has tried
|
||||
// lower some of these physreg copies and we want to ensure we get
|
||||
// reasonable bug reports if someone encounters a case no other testing
|
||||
// found. This path should be removed after the LLVM 7 release.
|
||||
report_fatal_error("Unable to copy EFLAGS physical register!");
|
||||
bool FromEFLAGS = SrcReg == X86::EFLAGS;
|
||||
bool ToEFLAGS = DestReg == X86::EFLAGS;
|
||||
int Reg = FromEFLAGS ? DestReg : SrcReg;
|
||||
bool is32 = X86::GR32RegClass.contains(Reg);
|
||||
bool is64 = X86::GR64RegClass.contains(Reg);
|
||||
|
||||
if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
|
||||
int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
|
||||
int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
|
||||
int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
|
||||
int Pop = is64 ? X86::POP64r : X86::POP32r;
|
||||
int PopF = is64 ? X86::POPF64 : X86::POPF32;
|
||||
int AX = is64 ? X86::RAX : X86::EAX;
|
||||
|
||||
if (!Subtarget.hasLAHFSAHF()) {
|
||||
assert(Subtarget.is64Bit() &&
|
||||
"Not having LAHF/SAHF only happens on 64-bit.");
|
||||
// Moving EFLAGS to / from another register requires a push and a pop.
|
||||
// Notice that we have to adjust the stack if we don't want to clobber the
|
||||
// first frame index. See X86FrameLowering.cpp - usesTheStack.
|
||||
if (FromEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(PushF));
|
||||
BuildMI(MBB, MI, DL, get(Pop), DestReg);
|
||||
}
|
||||
if (ToEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(Push))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, MI, DL, get(PopF));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
|
||||
// inefficient. Instead:
|
||||
// - Save the overflow flag OF into AL using SETO, and restore it using a
|
||||
// signed 8-bit addition of AL and INT8_MAX.
|
||||
// - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
|
||||
// using LAHF/SAHF.
|
||||
// - When RAX/EAX is live and isn't the destination register, make sure it
|
||||
// isn't clobbered by PUSH/POP'ing it before and after saving/restoring
|
||||
// the flags.
|
||||
// This approach is ~2.25x faster than using PUSHF/POPF.
|
||||
//
|
||||
// This is still somewhat inefficient because we don't know which flags are
|
||||
// actually live inside EFLAGS. Were we able to do a single SETcc instead of
|
||||
// SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
|
||||
//
|
||||
// PUSHF/POPF is also potentially incorrect because it affects other flags
|
||||
// such as TF/IF/DF, which LLVM doesn't model.
|
||||
//
|
||||
// Notice that we have to adjust the stack if we don't want to clobber the
|
||||
// first frame index.
|
||||
// See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment.
|
||||
|
||||
const TargetRegisterInfo &TRI = getRegisterInfo();
|
||||
MachineBasicBlock::LivenessQueryResult LQR =
|
||||
MBB.computeRegisterLiveness(&TRI, AX, MI);
|
||||
// We do not want to save and restore AX if we do not have to.
|
||||
// Moreover, if we do so whereas AX is dead, we would need to set
|
||||
// an undef flag on the use of AX, otherwise the verifier will
|
||||
// complain that we read an undef value.
|
||||
// We do not want to change the behavior of the machine verifier
|
||||
// as this is usually wrong to read an undef value.
|
||||
if (MachineBasicBlock::LQR_Unknown == LQR) {
|
||||
LivePhysRegs LPR(TRI);
|
||||
LPR.addLiveOuts(MBB);
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
while (I != MI) {
|
||||
--I;
|
||||
LPR.stepBackward(*I);
|
||||
}
|
||||
// AX contains the top most register in the aliasing hierarchy.
|
||||
// It may not be live, but one of its aliases may be.
|
||||
for (MCRegAliasIterator AI(AX, &TRI, true);
|
||||
AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI)
|
||||
LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live
|
||||
: MachineBasicBlock::LQR_Dead;
|
||||
}
|
||||
bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR);
|
||||
if (!AXDead)
|
||||
BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
|
||||
if (FromEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
|
||||
BuildMI(MBB, MI, DL, get(X86::LAHF));
|
||||
BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
|
||||
}
|
||||
if (ToEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
|
||||
.addReg(X86::AL)
|
||||
.addImm(INT8_MAX);
|
||||
BuildMI(MBB, MI, DL, get(X86::SAHF));
|
||||
}
|
||||
if (!AXDead)
|
||||
BuildMI(MBB, MI, DL, get(Pop), AX);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
|
||||
@ -7375,9 +7465,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
|
||||
if (IsCmpZero || IsSwapped) {
|
||||
// We decode the condition code from opcode.
|
||||
if (Instr.isBranch())
|
||||
OldCC = X86::getCondFromBranchOpc(Instr.getOpcode());
|
||||
OldCC = getCondFromBranchOpc(Instr.getOpcode());
|
||||
else {
|
||||
OldCC = X86::getCondFromSETOpc(Instr.getOpcode());
|
||||
OldCC = getCondFromSETOpc(Instr.getOpcode());
|
||||
if (OldCC != X86::COND_INVALID)
|
||||
OpcIsSET = true;
|
||||
else
|
||||
@ -9323,9 +9413,8 @@ bool X86InstrInfo::
|
||||
isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
||||
// FIXME: Return false for x87 stack register classes for now. We can't
|
||||
// allow any loads of these registers before FpGet_ST0_80.
|
||||
return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
|
||||
RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
|
||||
RC == &X86::RFP80RegClass);
|
||||
return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
|
||||
RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
|
||||
}
|
||||
|
||||
/// Return a virtual register initialized with the
|
||||
|
@ -77,12 +77,6 @@ unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
|
||||
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
|
||||
bool HasMemoryOperand = false);
|
||||
|
||||
// Turn jCC opcode into condition code.
|
||||
CondCode getCondFromBranchOpc(unsigned Opc);
|
||||
|
||||
// Turn setCC opcode into condition code.
|
||||
CondCode getCondFromSETOpc(unsigned Opc);
|
||||
|
||||
// Turn CMov opcode into condition code.
|
||||
CondCode getCondFromCMovOpc(unsigned Opc);
|
||||
|
||||
|
@ -1235,18 +1235,18 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
|
||||
|
||||
let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
|
||||
SchedRW = [WriteRMW] in {
|
||||
let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
|
||||
let Defs = [ESP, EFLAGS], Uses = [ESP] in
|
||||
def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
|
||||
[(int_x86_flags_write_u32 GR32:$src)]>,
|
||||
Requires<[Not64BitMode]>;
|
||||
|
||||
let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in
|
||||
let Defs = [RSP, EFLAGS], Uses = [RSP] in
|
||||
def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src),
|
||||
[(int_x86_flags_write_u64 GR64:$src)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
|
||||
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
|
||||
SchedRW = [WriteLoad] in {
|
||||
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>,
|
||||
OpSize16;
|
||||
@ -1254,7 +1254,7 @@ def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
}
|
||||
|
||||
let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0,
|
||||
let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, hasSideEffects=0,
|
||||
SchedRW = [WriteStore] in {
|
||||
def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
|
||||
OpSize16;
|
||||
@ -1294,10 +1294,10 @@ def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
|
||||
let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
|
||||
def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>,
|
||||
OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>;
|
||||
let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in
|
||||
let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, hasSideEffects=0 in
|
||||
def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
|
||||
OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>;
|
||||
|
||||
@ -1382,7 +1382,8 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
||||
} // Defs = [EFLAGS]
|
||||
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in {
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
|
||||
def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
|
||||
"movsb\t{$src, $dst|$dst, $src}", [], IIC_MOVS>;
|
||||
def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
|
||||
@ -1393,33 +1394,36 @@ def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
|
||||
"movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>;
|
||||
}
|
||||
|
||||
let Defs = [EDI], Uses = [AL,EDI,DF] in
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
|
||||
def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst),
|
||||
"stosb\t{%al, $dst|$dst, al}", [], IIC_STOS>;
|
||||
let Defs = [EDI], Uses = [AX,EDI,DF] in
|
||||
let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
|
||||
def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst),
|
||||
"stosw\t{%ax, $dst|$dst, ax}", [], IIC_STOS>, OpSize16;
|
||||
let Defs = [EDI], Uses = [EAX,EDI,DF] in
|
||||
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
|
||||
def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst),
|
||||
"stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32;
|
||||
let Defs = [RDI], Uses = [RAX,RDI,DF] in
|
||||
let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in
|
||||
def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst),
|
||||
"stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>;
|
||||
|
||||
let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in
|
||||
def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst),
|
||||
"scasb\t{$dst, %al|al, $dst}", [], IIC_SCAS>;
|
||||
let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in
|
||||
let Defs = [EDI,EFLAGS], Uses = [AX,EDI,EFLAGS] in
|
||||
def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst),
|
||||
"scasw\t{$dst, %ax|ax, $dst}", [], IIC_SCAS>, OpSize16;
|
||||
let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in
|
||||
let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,EFLAGS] in
|
||||
def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst),
|
||||
"scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32;
|
||||
let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in
|
||||
let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in
|
||||
def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst),
|
||||
"scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>;
|
||||
|
||||
let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in {
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in {
|
||||
def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
|
||||
"cmpsb\t{$dst, $src|$src, $dst}", [], IIC_CMPS>;
|
||||
def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
|
||||
@ -2066,7 +2070,8 @@ def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", [], IIC_NOP>,
|
||||
} // SchedRW
|
||||
|
||||
// Repeat string operation instruction prefixes
|
||||
let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in {
|
||||
// These use the DF flag in the EFLAGS register to inc or dec ECX
|
||||
let Defs = [ECX], Uses = [ECX,EFLAGS], SchedRW = [WriteMicrocoded] in {
|
||||
// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
|
||||
def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>;
|
||||
// Repeat while not equal (used with CMPS and SCAS)
|
||||
@ -2075,22 +2080,24 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>;
|
||||
|
||||
// String manipulation instructions
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
let Defs = [AL,ESI], Uses = [ESI,DF] in
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [AL,ESI], Uses = [ESI,EFLAGS] in
|
||||
def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src),
|
||||
"lodsb\t{$src, %al|al, $src}", [], IIC_LODS>;
|
||||
let Defs = [AX,ESI], Uses = [ESI,DF] in
|
||||
let Defs = [AX,ESI], Uses = [ESI,EFLAGS] in
|
||||
def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src),
|
||||
"lodsw\t{$src, %ax|ax, $src}", [], IIC_LODS>, OpSize16;
|
||||
let Defs = [EAX,ESI], Uses = [ESI,DF] in
|
||||
let Defs = [EAX,ESI], Uses = [ESI,EFLAGS] in
|
||||
def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src),
|
||||
"lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32;
|
||||
let Defs = [RAX,ESI], Uses = [ESI,DF] in
|
||||
let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in
|
||||
def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src),
|
||||
"lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Defs = [ESI], Uses = [DX,ESI,DF] in {
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [ESI], Uses = [DX,ESI,EFLAGS] in {
|
||||
def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src),
|
||||
"outsb\t{$src, %dx|dx, $src}", [], IIC_OUTS>;
|
||||
def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src),
|
||||
@ -2099,7 +2106,8 @@ def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src),
|
||||
"outs{l|d}\t{$src, %dx|dx, $src}", [], IIC_OUTS>, OpSize32;
|
||||
}
|
||||
|
||||
let Defs = [EDI], Uses = [DX,EDI,DF] in {
|
||||
// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
|
||||
let Defs = [EDI], Uses = [DX,EDI,EFLAGS] in {
|
||||
def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst),
|
||||
"insb\t{%dx, $dst|$dst, dx}", [], IIC_INS>;
|
||||
def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst),
|
||||
@ -2109,21 +2117,18 @@ def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst),
|
||||
}
|
||||
}
|
||||
|
||||
// EFLAGS management instructions.
|
||||
let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in {
|
||||
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC_CMC_STC>;
|
||||
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_CLC_CMC_STC>;
|
||||
def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CLC_CMC_STC>;
|
||||
}
|
||||
|
||||
// DF management instructions.
|
||||
// FIXME: These are a bit more expensive than CLC and STC. We should consider
|
||||
// adjusting their schedule bucket.
|
||||
let SchedRW = [WriteALU], Defs = [DF] in {
|
||||
// Flag instructions
|
||||
let SchedRW = [WriteALU] in {
|
||||
def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>;
|
||||
def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>;
|
||||
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
|
||||
def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
|
||||
def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>;
|
||||
def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>;
|
||||
}
|
||||
def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>;
|
||||
|
||||
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
|
||||
}
|
||||
|
||||
// Table lookup instructions
|
||||
let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
|
||||
|
@ -692,19 +692,6 @@ let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
|
||||
} // Uses, Defs
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TS flag control instruction.
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// IF (inside EFLAGS) management instructions.
|
||||
let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
|
||||
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>;
|
||||
def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// RDPID Instruction
|
||||
let SchedRW = [WriteSystem] in {
|
||||
|
@ -251,19 +251,9 @@ def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
|
||||
// Floating-point status word
|
||||
def FPSW : X86Reg<"fpsw", 0>;
|
||||
|
||||
// Status flags register.
|
||||
//
|
||||
// Note that some flags that are commonly thought of as part of the status
|
||||
// flags register are modeled separately. Typically this is due to instructions
|
||||
// reading and updating those flags independently of all the others. We don't
|
||||
// want to create false dependencies between these instructions and so we use
|
||||
// a separate register to model them.
|
||||
// Status flags register
|
||||
def EFLAGS : X86Reg<"flags", 0>;
|
||||
|
||||
// The direction flag.
|
||||
def DF : X86Reg<"DF", 0>;
|
||||
|
||||
|
||||
// Segment registers
|
||||
def CS : X86Reg<"cs", 1>;
|
||||
def DS : X86Reg<"ds", 3>;
|
||||
@ -507,10 +497,6 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// AVX-512 vector/mask registers.
|
||||
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
|
||||
|
@ -608,10 +608,12 @@ def IIC_CMPXCHG_8B : InstrItinClass;
|
||||
def IIC_CMPXCHG_16B : InstrItinClass;
|
||||
def IIC_LODS : InstrItinClass;
|
||||
def IIC_OUTS : InstrItinClass;
|
||||
def IIC_CLC_CMC_STC : InstrItinClass;
|
||||
def IIC_CLC : InstrItinClass;
|
||||
def IIC_CLD : InstrItinClass;
|
||||
def IIC_CLI : InstrItinClass;
|
||||
def IIC_CMC : InstrItinClass;
|
||||
def IIC_CLTS : InstrItinClass;
|
||||
def IIC_STC : InstrItinClass;
|
||||
def IIC_STI : InstrItinClass;
|
||||
def IIC_STD : InstrItinClass;
|
||||
def IIC_XLAT : InstrItinClass;
|
||||
|
@ -514,10 +514,12 @@ def AtomItineraries : ProcessorItineraries<
|
||||
InstrItinData<IIC_CMPXCHG_16B, [InstrStage<22, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_LODS, [InstrStage<2, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_OUTS, [InstrStage<74, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CLC_CMC_STC, [InstrStage<1, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CLC, [InstrStage<1, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CLD, [InstrStage<3, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CLI, [InstrStage<14, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CMC, [InstrStage<1, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_CLTS, [InstrStage<33, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_STC, [InstrStage<1, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_STI, [InstrStage<17, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_STD, [InstrStage<21, [Port0, Port1]>] >,
|
||||
InstrItinData<IIC_XLAT, [InstrStage<6, [Port0, Port1]>] >,
|
||||
|
@ -62,7 +62,6 @@ void initializeX86CallFrameOptimizationPass(PassRegistry &);
|
||||
void initializeX86CmovConverterPassPass(PassRegistry &);
|
||||
void initializeX86ExecutionDepsFixPass(PassRegistry &);
|
||||
void initializeX86DomainReassignmentPass(PassRegistry &);
|
||||
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
@ -81,7 +80,6 @@ extern "C" void LLVMInitializeX86Target() {
|
||||
initializeX86CmovConverterPassPass(PR);
|
||||
initializeX86ExecutionDepsFixPass(PR);
|
||||
initializeX86DomainReassignmentPass(PR);
|
||||
initializeX86FlagsCopyLoweringPassPass(PR);
|
||||
}
|
||||
|
||||
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
|
||||
@ -417,7 +415,6 @@ void X86PassConfig::addPreRegAlloc() {
|
||||
addPass(createX86CallFrameOptimization());
|
||||
}
|
||||
|
||||
addPass(createX86FlagsCopyLoweringPass());
|
||||
addPass(createX86WinAllocaExpander());
|
||||
}
|
||||
void X86PassConfig::addMachineSSAOptimization() {
|
||||
|
@ -2559,8 +2559,6 @@ def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>;
|
||||
def mno_rtm : Flag<["-"], "mno-rtm">, Group<m_x86_Features_Group>;
|
||||
def mrdseed : Flag<["-"], "mrdseed">, Group<m_x86_Features_Group>;
|
||||
def mno_rdseed : Flag<["-"], "mno-rdseed">, Group<m_x86_Features_Group>;
|
||||
def msahf : Flag<["-"], "msahf">, Group<m_x86_Features_Group>;
|
||||
def mno_sahf : Flag<["-"], "mno-sahf">, Group<m_x86_Features_Group>;
|
||||
def msgx : Flag<["-"], "msgx">, Group<m_x86_Features_Group>;
|
||||
def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
|
||||
def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
|
||||
|
@ -198,7 +198,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_Core2:
|
||||
setFeatureEnabledImpl(Features, "ssse3", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_Yonah:
|
||||
case CK_Prescott:
|
||||
@ -240,7 +239,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "ssse3", true);
|
||||
setFeatureEnabledImpl(Features, "fxsr", true);
|
||||
setFeatureEnabledImpl(Features, "cx16", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
break;
|
||||
|
||||
case CK_KNM:
|
||||
@ -271,7 +269,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "xsaveopt", true);
|
||||
setFeatureEnabledImpl(Features, "xsave", true);
|
||||
setFeatureEnabledImpl(Features, "movbe", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
break;
|
||||
|
||||
case CK_K6_2:
|
||||
@ -285,7 +282,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "sse4a", true);
|
||||
setFeatureEnabledImpl(Features, "lzcnt", true);
|
||||
setFeatureEnabledImpl(Features, "popcnt", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_K8SSE3:
|
||||
setFeatureEnabledImpl(Features, "sse3", true);
|
||||
@ -319,7 +315,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "prfchw", true);
|
||||
setFeatureEnabledImpl(Features, "cx16", true);
|
||||
setFeatureEnabledImpl(Features, "fxsr", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
break;
|
||||
|
||||
case CK_ZNVER1:
|
||||
@ -343,7 +338,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "prfchw", true);
|
||||
setFeatureEnabledImpl(Features, "rdrnd", true);
|
||||
setFeatureEnabledImpl(Features, "rdseed", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
setFeatureEnabledImpl(Features, "sha", true);
|
||||
setFeatureEnabledImpl(Features, "sse4a", true);
|
||||
setFeatureEnabledImpl(Features, "xsave", true);
|
||||
@ -378,7 +372,6 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "cx16", true);
|
||||
setFeatureEnabledImpl(Features, "fxsr", true);
|
||||
setFeatureEnabledImpl(Features, "xsave", true);
|
||||
setFeatureEnabledImpl(Features, "sahf", true);
|
||||
break;
|
||||
}
|
||||
if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec))
|
||||
@ -775,8 +768,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
HasRetpoline = true;
|
||||
} else if (Feature == "+retpoline-external-thunk") {
|
||||
HasRetpolineExternalThunk = true;
|
||||
} else if (Feature == "+sahf") {
|
||||
HasLAHFSAHF = true;
|
||||
}
|
||||
|
||||
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
|
||||
@ -1249,7 +1240,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
||||
.Case("rdrnd", true)
|
||||
.Case("rdseed", true)
|
||||
.Case("rtm", true)
|
||||
.Case("sahf", true)
|
||||
.Case("sgx", true)
|
||||
.Case("sha", true)
|
||||
.Case("shstk", true)
|
||||
@ -1323,7 +1313,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("retpoline", HasRetpoline)
|
||||
.Case("retpoline-external-thunk", HasRetpolineExternalThunk)
|
||||
.Case("rtm", HasRTM)
|
||||
.Case("sahf", HasLAHFSAHF)
|
||||
.Case("sgx", HasSGX)
|
||||
.Case("sha", HasSHA)
|
||||
.Case("shstk", HasSHSTK)
|
||||
|
@ -98,7 +98,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
||||
bool HasPREFETCHWT1 = false;
|
||||
bool HasRetpoline = false;
|
||||
bool HasRetpolineExternalThunk = false;
|
||||
bool HasLAHFSAHF = false;
|
||||
|
||||
/// \brief Enumeration of all of the X86 CPUs supported by Clang.
|
||||
///
|
||||
|
@ -1,3 +1,3 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#define FREEBSD_CC_VERSION 1200012
|
||||
#define FREEBSD_CC_VERSION 1200013
|
||||
|
@ -1042,7 +1042,6 @@ SRCS_MIN+= Target/X86/X86FastISel.cpp
|
||||
SRCS_MIN+= Target/X86/X86FixupBWInsts.cpp
|
||||
SRCS_MIN+= Target/X86/X86FixupLEAs.cpp
|
||||
SRCS_MIN+= Target/X86/X86FixupSetCC.cpp
|
||||
SRCS_MIN+= Target/X86/X86FlagsCopyLowering.cpp
|
||||
SRCS_MIN+= Target/X86/X86FloatingPoint.cpp
|
||||
SRCS_MIN+= Target/X86/X86FrameLowering.cpp
|
||||
SRCS_MIN+= Target/X86/X86ISelDAGToDAG.cpp
|
||||
|
Loading…
Reference in New Issue
Block a user