Vendor import of llvm release_50 branch r311219:

https://llvm.org/svn/llvm-project/llvm/branches/release_50@311219
This commit is contained in:
Dimitry Andric 2017-08-20 21:02:43 +00:00
parent 4e20bb0468
commit 15c5c77fa0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist/; revision=322727
svn path=/vendor/llvm/llvm-release_50-r311219/; revision=322728; tag=vendor/llvm/llvm-release_50-r311219
55 changed files with 1298 additions and 228 deletions

View File

@ -314,6 +314,7 @@ set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules)
set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
# List of all targets to be built by default:
set(LLVM_ALL_TARGETS
AArch64
AMDGPU
@ -325,7 +326,6 @@ set(LLVM_ALL_TARGETS
MSP430
NVPTX
PowerPC
RISCV
Sparc
SystemZ
X86

View File

@ -5369,6 +5369,10 @@ The following behaviors are supported:
nodes. However, duplicate entries in the second list are dropped
during the append operation.
* - 7
- **Max**
Takes the max of the two values, which are required to be integers.
It is an error for a particular unique flag ID to have multiple behaviors,
except in the case of **Require** (which adds restrictions on another metadata
value) or **Override**.

View File

@ -117,6 +117,18 @@ Changes to the X86 Target
* Added support for AMD Lightweight Profiling (LWP) instructions.
* Avoid using slow LEA instructions.
* Use alternative sequences for multiply by constant.
* Improved lowering of strided shuffles.
* Improved the AVX512 cost model used by the vectorizer.
* Fix scalar code performance when AVX512 is enabled by making i1's illegal.
* Fixed many inline assembly bugs.
Changes to the AMDGPU Target
-----------------------------
@ -160,7 +172,29 @@ Changes to the C API
External Open Source Projects Using LLVM 5
==========================================
* A project...
Zig Programming Language
------------------------
`Zig <http://ziglang.org>`_ is an open-source programming language designed
for robustness, optimality, and clarity. It integrates closely with C and is
intended to eventually take the place of C. It uses LLVM to produce highly
optimized native code and to cross-compile for any target out of the box. Zig
is in alpha; with a beta release expected in September.
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
are underway.
Additional Information

View File

@ -1220,8 +1220,9 @@ class SelectionDAG {
/// If an existing load has uses of its chain, create a token factor node with
/// that chain and the new memory node's chain and update users of the old
/// chain to the token factor. This ensures that the new memory node will have
/// the same relative memory dependency position as the old load.
void makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New);
/// the same relative memory dependency position as the old load. Returns the
/// new merged load chain.
SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New);
/// Topological-sort the AllNodes list and a
/// assign a unique node id for each node in the DAG based on their

View File

@ -94,9 +94,9 @@ template <typename BaseLayerT> class LazyEmittingLayer {
llvm_unreachable("Invalid emit-state.");
}
void removeModuleFromBaseLayer(BaseLayerT &BaseLayer) {
if (EmitState != NotEmitted)
BaseLayer.removeModule(Handle);
Error removeModuleFromBaseLayer(BaseLayerT& BaseLayer) {
return EmitState != NotEmitted ? BaseLayer.removeModule(Handle)
: Error::success();
}
void emitAndFinalize(BaseLayerT &BaseLayer) {
@ -226,9 +226,9 @@ template <typename BaseLayerT> class LazyEmittingLayer {
/// This method will free the memory associated with the given module, both
/// in this layer, and the base layer.
Error removeModule(ModuleHandleT H) {
(*H)->removeModuleFromBaseLayer(BaseLayer);
Error Err = (*H)->removeModuleFromBaseLayer(BaseLayer);
ModuleList.erase(H);
return Error::success();
return Err;
}
/// @brief Search for the given named symbol.

View File

@ -73,6 +73,7 @@ class COFFImportFile : public SymbolicFile {
struct COFFShortExport {
std::string Name;
std::string ExtName;
std::string SymbolName;
uint16_t Ordinal = 0;
bool Noname = false;
@ -98,7 +99,8 @@ struct COFFShortExport {
std::error_code writeImportLibrary(StringRef ImportName,
StringRef Path,
ArrayRef<COFFShortExport> Exports,
COFF::MachineTypes Machine);
COFF::MachineTypes Machine,
bool MakeWeakAliases);
} // namespace object
} // namespace llvm

View File

@ -162,6 +162,11 @@ static cl::opt<unsigned>
cl::desc("Maximum depth of recursive SExt/ZExt"),
cl::init(8));
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(16));
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@ -2878,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
// Limit max number of arguments to avoid creation of unreasonably big
// SCEVAddRecs with very complex operands.
if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
MaxAddRecSize)
continue;
bool Overflow = false;
Type *Ty = AddRec->getType();
bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
@ -7582,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
if (const SCEVConstant *BTCC =
dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
// This trivial case can show up in some degenerate cases where
// the incoming IR has not yet been fully simplified.
if (BTCC->getValue()->isZero()) {
Value *InitValue = nullptr;
bool MultipleInitValues = false;
for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
if (!LI->contains(PN->getIncomingBlock(i))) {
if (!InitValue)
InitValue = PN->getIncomingValue(i);
else if (InitValue != PN->getIncomingValue(i)) {
MultipleInitValues = true;
break;
}
}
if (!MultipleInitValues && InitValue)
return getSCEV(InitValue);
}
}
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.

View File

@ -4458,6 +4458,10 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
// Bail out when we hit the limit.
if (Depth == MaxDepth)
return None;
// A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
if (LHS->getType() != RHS->getType())
return None;

View File

@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDValue Cond = GetScalarizedVector(N->getOperand(0));
SDValue Cond = N->getOperand(0);
EVT OpVT = Cond.getValueType();
SDLoc DL(N);
// The vselect result and true/value operands needs scalarizing, but it's
// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
// See the similar logic in ScalarizeVecRes_VSETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Cond = GetScalarizedVector(Cond);
} else {
EVT VT = OpVT.getVectorElementType();
Cond = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
SDValue LHS = GetScalarizedVector(N->getOperand(1));
TargetLowering::BooleanContent ScalarBool =
TLI.getBooleanContents(false, false);

View File

@ -7262,22 +7262,23 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
AddDbgValue(I, ToNode, false);
}
void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
if (!OldLoad->hasAnyUseOfValue(1))
return;
// The new memory operation must have the same position as the old load in
// terms of memory dependency. Create a TokenFactor for the old load and new
// memory operation and update uses of the old load's output chain to use that
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
if (!OldLoad->hasAnyUseOfValue(1))
return NewChain;
SDValue TokenFactor =
getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
return TokenFactor;
}
//===----------------------------------------------------------------------===//

View File

@ -180,6 +180,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const;
public:
static char ID;
@ -415,6 +416,32 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
}
}
/// Check whether (part of) \p SuperPhysReg is live through \p MI.
/// \pre \p MI defines a subregister of a virtual register that
/// has been assigned to \p SuperPhysReg.
bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
unsigned SuperPhysReg) const {
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) {
const LiveRange &UnitRange = LIS->getRegUnit(*Unit);
// If the regunit is live both before and after MI,
// we assume it is live through.
// Generally speaking, this is not true, because something like
// "RU = op RU" would match that description.
// However, we know that we are trying to assess whether
// a def of a virtual reg, vreg, is live at the same time of RU.
// If we are in the "RU = op RU" situation, that means that vreg
// is defined at the same time as RU (i.e., "vreg, RU = op RU").
// Thus, vreg and RU interferes and vreg cannot be assigned to
// SuperPhysReg. Therefore, this situation cannot happen.
if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses))
return true;
}
return false;
}
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@ -452,7 +479,8 @@ void VirtRegRewriter::rewrite() {
// A virtual register kill refers to the whole register, so we may
// have to add <imp-use,kill> operands for the super-register. A
// partial redef always kills and redefines the super-register.
if (MO.readsReg() && (MO.isDef() || MO.isKill()))
if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
(MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {

View File

@ -134,13 +134,13 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
uint64_t StringOffset =
StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
if (Format == DWARF32) {
OS << format("%8.8x ", StringOffset);
uint32_t StringOffset32 = (uint32_t)StringOffset;
OS << format("%8.8x ", StringOffset32);
const char *S = StrData.getCStr(&StringOffset32);
if (S)
OS << format("\"%s\"", S);
} else
OS << format("%16.16x ", StringOffset);
OS << format("%16.16" PRIx64 " ", StringOffset);
OS << "\n";
}
}

View File

@ -196,7 +196,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
++NumErrors;
OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
"bounds: "
<< format("0x%08" PRIx32, *SectionOffset) << "\n";
<< format("0x%08" PRIx64, *SectionOffset) << "\n";
Die.dump(OS, 0);
OS << "\n";
}
@ -234,7 +234,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
if (CUOffset >= CUSize) {
++NumErrors;
OS << "error: " << FormEncodingString(Form) << " CU offset "
<< format("0x%08" PRIx32, CUOffset)
<< format("0x%08" PRIx64, CUOffset)
<< " is invalid (must be less than CU size of "
<< format("0x%08" PRIx32, CUSize) << "):\n";
Die.dump(OS, 0);
@ -366,7 +366,7 @@ void DWARFVerifier::verifyDebugLineRows() {
if (Row.Address < PrevAddress) {
++NumDebugLineErrors;
OS << "error: .debug_line["
<< format("0x%08" PRIx32,
<< format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "] row[" << RowIndex
<< "] decreases in address from previous row:\n";
@ -381,7 +381,7 @@ void DWARFVerifier::verifyDebugLineRows() {
if (Row.File > MaxFileIndex) {
++NumDebugLineErrors;
OS << "error: .debug_line["
<< format("0x%08" PRIx32,
<< format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "][" << RowIndex << "] has invalid file index " << Row.File
<< " (valid values are [1," << MaxFileIndex << "]):\n";

View File

@ -557,7 +557,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
ArrayRef<COFFShortExport> Exports,
MachineTypes Machine) {
MachineTypes Machine, bool MakeWeakAliases) {
std::vector<NewArchiveMember> Members;
ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine);
@ -575,7 +575,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
if (E.Private)
continue;
if (E.isWeak()) {
if (E.isWeak() && MakeWeakAliases) {
Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false));
Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true));
continue;
@ -587,7 +587,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
if (E.Constant)
ImportType = IMPORT_CONST;
StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name;
StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName;
ImportNameType NameType = getNameType(SymbolName, E.Name, Machine);
Expected<std::string> Name = E.ExtName.empty()
? SymbolName

View File

@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst,
}
static unsigned getPreIndexedOpcode(unsigned Opc) {
// FIXME: We don't currently support creating pre-indexed loads/stores when
// the load or store is the unscaled version. If we decide to perform such an
// optimization in the future the cases for the unscaled loads/stores will
// need to be added here.
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
default:
llvm_unreachable("Opcode has no post-indexed wise equivalent!");
case AArch64::STRSui:
case AArch64::STURSi:
return AArch64::STRSpost;
case AArch64::STRDui:
case AArch64::STURDi:
return AArch64::STRDpost;
case AArch64::STRQui:
case AArch64::STURQi:
return AArch64::STRQpost;
case AArch64::STRBBui:
return AArch64::STRBBpost;
case AArch64::STRHHui:
return AArch64::STRHHpost;
case AArch64::STRWui:
case AArch64::STURWi:
return AArch64::STRWpost;
case AArch64::STRXui:
case AArch64::STURXi:
return AArch64::STRXpost;
case AArch64::LDRSui:
case AArch64::LDURSi:
return AArch64::LDRSpost;
case AArch64::LDRDui:
case AArch64::LDURDi:
return AArch64::LDRDpost;
case AArch64::LDRQui:
case AArch64::LDURQi:
return AArch64::LDRQpost;
case AArch64::LDRBBui:
return AArch64::LDRBBpost;
case AArch64::LDRHHui:
return AArch64::LDRHHpost;
case AArch64::LDRWui:
case AArch64::LDURWi:
return AArch64::LDRWpost;
case AArch64::LDRXui:
case AArch64::LDURXi:
return AArch64::LDRXpost;
case AArch64::LDRSWui:
return AArch64::LDRSWpost;
@ -1694,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++NumPostFolded;
break;
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
// Don't know how to handle unscaled pre/post-index versions below, so
// move to the next instruction.
if (TII->isUnscaledLdSt(Opc)) {
++MBBI;
break;

View File

@ -769,8 +769,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
bool StatusDead = MI.getOperand(1).isDead();
unsigned TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
@ -797,23 +796,9 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
}
// .Lloadcmp:
// mov wStatus, #0
// ldrex rDest, [rAddr]
// cmp rDest, rDesired
// bne .Ldone
if (!StatusDead) {
if (IsThumb) {
BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg)
.addDef(ARM::CPSR, RegState::Dead)
.addImm(0)
.add(predOps(ARMCC::AL));
} else {
BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg)
.addImm(0)
.add(predOps(ARMCC::AL))
.add(condCodeOp());
}
}
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
@ -836,10 +821,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
// strex rStatus, rNew, [rAddr]
// cmp rStatus, #0
// strex rTempReg, rNew, [rAddr]
// cmp rTempReg, #0
// bne .Lloadcmp
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg)
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
.addReg(NewReg)
.addReg(AddrReg);
if (StrexOp == ARM::t2STREX)
@ -848,7 +833,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
.addReg(StatusReg, getKillRegState(StatusDead))
.addReg(TempReg, RegState::Kill)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
@ -904,8 +889,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
bool StatusDead = MI.getOperand(1).isDead();
unsigned TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
@ -931,7 +915,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// .Lloadcmp:
// ldrexd rDestLo, rDestHi, [rAddr]
// cmp rDestLo, rDesiredLo
// sbcs rStatus<dead>, rDestHi, rDesiredHi
// sbcs rTempReg<dead>, rDestHi, rDesiredHi
// bne .Ldone
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
MachineInstrBuilder MIB;
@ -959,17 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
// strexd rStatus, rNewLo, rNewHi, [rAddr]
// cmp rStatus, #0
// strexd rTempReg, rNewLo, rNewHi, [rAddr]
// cmp rTempReg, #0
// bne .Lloadcmp
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
.addReg(StatusReg, getKillRegState(StatusDead))
.addReg(TempReg, RegState::Kill)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))

View File

@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
// significantly more naive than the standard expansion: we conservatively
// assume seq_cst, strong cmpxchg and omit clrex on failure.
let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
let Constraints = "@earlyclobber $Rd,@earlyclobber $temp",
mayLoad = 1, mayStore = 1 in {
def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
NoItinerary, []>, Sched<[]>;
}

View File

@ -419,6 +419,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Custom under AVX1.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64 })
MVT::v8f32, MVT::v4f64, MVT::v1i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
MVT::v16i1, MVT::v32i1, MVT::v64i1 })
@ -14570,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ResVT = Op.getSimpleValueType();
// When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
// would result with: v1i1 = extract_subvector(vXi1, idx).
// Lower these into extract_vector_elt which is already selectable.
if (ResVT == MVT::v1i1) {
assert(Subtarget.hasAVX512() &&
"Boolean EXTRACT_SUBVECTOR requires AVX512");
MVT EltVT = ResVT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT LegalVT =
(TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
}
assert((In.getSimpleValueType().is256BitVector() ||
In.getSimpleValueType().is512BitVector()) &&
"Can only extract from 256-bit or 512-bit vectors");
@ -20651,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
// ADC/ADCX/SBB
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32);
SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
DAG.getConstant(-1, dl, MVT::i8));
SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
@ -30663,6 +30683,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue(N, 0);
}
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
RHS = DAG.getBitcast(MVT::i64, RHS);
SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS);
return DAG.getBitcast(VT, newSelect);
}
return SDValue();
}
@ -33358,7 +33386,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
SDValue NewChain = NewLd.getValue(1);
// Make sure new load is placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
@ -33379,11 +33408,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
Ld->getPointerInfo().getWithOffset(4),
MinAlign(Ld->getAlignment(), 4),
Ld->getMemOperand()->getFlags());
// Make sure new loads are placed in same chain order.
SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
SDValue NewChain = LoLd.getValue(1);
if (TokenFactorIndex >= 0) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}

View File

@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
(_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
}
multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg> {
let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable_custom<opc, MRMSrcReg,
(outs _.RC:$dst), (ins GR32:$src),
!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
!con((ins _.KRCWM:$mask), (ins GR32:$src)),
"vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
"$src0 = $dst">, T8PD, EVEX;
def : Pat <(_.VT (OpNode SrcRC:$src)),
(!cast<Instruction>(Name#r)
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
(!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
(!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
(i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
}
multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC,
Subreg>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode,
SrcRC, Subreg>, EVEX_V256;
defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode,
SrcRC, Subreg>, EVEX_V128;
}
}
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
}
}
let isCodeGenOnly = 1 in {
defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
X86VBroadcast, GR8, HasBWI>;
defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
X86VBroadcast, GR16, HasBWI>;
}
let isAsmParserOnly = 1 in {
defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
null_frag, GR32, HasBWI>;
defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
null_frag, GR32, HasBWI>;
}
defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
HasBWI>;
defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
X86VBroadcast, GR32, HasAVX512>;
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,

View File

@ -60,11 +60,13 @@ std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
MemoryBufferRef openFile(StringRef Path) {
Optional<MemoryBufferRef> openFile(StringRef Path) {
ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);
if (std::error_code EC = MB.getError())
if (std::error_code EC = MB.getError()) {
llvm::errs() << "fail openFile: " << EC.message() << "\n";
return None;
}
MemoryBufferRef MBRef = MB.get()->getMemBufferRef();
OwningMBs.push_back(std::move(MB.get())); // take ownership
@ -114,11 +116,16 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
MemoryBufferRef MB;
if (auto *Arg = Args.getLastArg(OPT_d))
MB = openFile(Arg->getValue());
if (!Args.hasArg(OPT_d)) {
llvm::errs() << "no definition file specified\n";
return 1;
}
if (!MB.getBufferSize()) {
Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue());
if (!MB)
return 1;
if (!MB->getBufferSize()) {
llvm::errs() << "definition file empty\n";
return 1;
}
@ -133,7 +140,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
Expected<COFFModuleDefinition> Def =
parseCOFFModuleDefinition(MB, Machine, true);
parseCOFFModuleDefinition(*MB, Machine, true);
if (!Def) {
llvm::errs() << "error parsing definition\n"
@ -154,7 +161,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
if (Path.empty())
Path = getImplibPath(Def->OutputFile);
if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine))
if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
return 1;
return 0;
}

View File

@ -1470,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
i = CS.arg_begin();
const unsigned ShadowArgStart = Args.size();
for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
Args.push_back(DFSF.getShadow(*i));
@ -1505,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
CustomCI->setCallingConv(CI->getCallingConv());
CustomCI->setAttributes(CI->getAttributes());
// Update the parameter attributes of the custom call instruction to
// zero extend the shadow parameters. This is required for targets
// which consider ShadowTy an illegal type.
for (unsigned n = 0; n < FT->getNumParams(); n++) {
const unsigned ArgNo = ShadowArgStart + n;
if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
}
if (!FT->getReturnType()->isVoidTy()) {
LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
DFSF.setShadow(CustomCI, LabelLoad);

View File

@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/BDCE.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DemandedBits.h"
@ -35,6 +36,46 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed (unused)");
STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
/// If an instruction is trivialized (dead), then the chain of users of that
/// instruction may need to be cleared of assumptions that can no longer be
/// guaranteed correct.
static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
// Initialize the worklist with eligible direct users.
SmallVector<Instruction *, 16> WorkList;
for (User *JU : I->users()) {
// If all bits of a user are demanded, then we know that nothing below that
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && !DB.getDemandedBits(J).isAllOnesValue())
WorkList.push_back(J);
}
// DFS through subsequent users while tracking visits to avoid cycles.
SmallPtrSet<Instruction *, 16> Visited;
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
// NSW, NUW, and exact are based on operands that might have changed.
J->dropPoisonGeneratingFlags();
// We do not have to worry about llvm.assume or range metadata:
// 1. llvm.assume demands its operand, so trivializing can't change it.
// 2. range metadata only applies to memory accesses which demand all bits.
Visited.insert(J);
for (User *KU : J->users()) {
// If all bits of a user are demanded, then we know that nothing below
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
if (K && !Visited.count(K) && !DB.getDemandedBits(K).isAllOnesValue())
WorkList.push_back(K);
}
}
}
static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
SmallVector<Instruction*, 128> Worklist;
bool Changed = false;
@ -51,6 +92,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
// replacing all uses with something else. Then, if they don't need to
// remain live (because they have side effects, etc.) we can remove them.
DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
clearAssumptionsOfUsers(&I, DB);
// FIXME: In theory we could substitute undef here instead of zero.
// This should be reconsidered once we settle on the semantics of
// undef, poison, etc.

View File

@ -0,0 +1,33 @@
; RUN: opt -analyze -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s
; Show that we are able to avoid creation of huge SCEVs by capping the max
; AddRec size.
define i32 @test_01(i32 %a, i32 %b) {
; CHECK-LABEL: Classifying expressions for: @test_01
; CHECK-NEXT: %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ]
; CHECK-NEXT: --> {%a,+,%b}<%loop> U: full-set S: full-set
; CHECK-NEXT: %iv.next = add i32 %iv, %b
; CHECK-NEXT: --> {(%a + %b),+,%b}<%loop> U: full-set S: full-set
; CHECK-NEXT: %x1 = mul i32 %iv, %iv.next
; CHECK-NEXT: --> {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> U: full-set S: full-set
; CHECK-NEXT: %x2 = mul i32 %x1, %x1
; CHECK-NEXT: --> ({((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop>) U: full-set S: full-set
; CHECK-NEXT: %x3 = mul i32 %x2, %x1
; CHECK-NEXT: --> ({((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop> * {((%a + %b) * %a),+,(((2 * %a) + (2 * %b)) * %b),+,(2 * %b * %b)}<%loop>) U: full-set S: full-set
entry:
br label %loop
loop:
%iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ]
%iv.next = add i32 %iv, %b
%cond = icmp slt i32 %iv.next, 1000
br i1 %cond, label %loop, label %exit
exit:
%x1 = mul i32 %iv, %iv.next
%x2 = mul i32 %x1, %x1
%x3 = mul i32 %x2, %x1
ret i32 %x3
}

View File

@ -0,0 +1,115 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s
---
# CHECK-LABEL: name: test_LDURSi_post
# CHECK: LDRSpost %x0, -4
name: test_LDURSi_post
body: |
bb.0.entry:
liveins: %x0
%s0 = LDURSi %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_LDURDi_post
# CHECK: LDRDpost %x0, -4
name: test_LDURDi_post
body: |
bb.0.entry:
liveins: %x0
%d0 = LDURDi %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_LDURQi_post
# CHECK: LDRQpost %x0, -4
name: test_LDURQi_post
body: |
bb.0.entry:
liveins: %x0
%q0 = LDURQi %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_LDURWi_post
# CHECK: LDRWpost %x0, -4
name: test_LDURWi_post
body: |
bb.0.entry:
liveins: %x0
%w1 = LDURWi %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_LDURXi_post
# CHECK: %x1 = LDRXpost %x0, -4
name: test_LDURXi_post
body: |
bb.0.entry:
liveins: %x0
%x1 = LDURXi %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_STURSi_post
# CHECK: STRSpost %s0, %x0, -4
name: test_STURSi_post
body: |
bb.0.entry:
liveins: %x0
%s0 = FMOVS0
STURSi %s0, %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_STURDi_post
# CHECK: STRDpost %d0, %x0, -4
name: test_STURDi_post
body: |
bb.0.entry:
liveins: %x0
%d0 = FMOVD0
STURDi %d0, %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_STURQi_post
# CHECK: STRQpost %q0, %x0, -4
name: test_STURQi_post
body: |
bb.0.entry:
liveins: %x0
%q0 = MOVIv4i32 0, 0
STURQi %q0, %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_STURWi_post
# CHECK: STRWpost %wzr, %x0, -4
name: test_STURWi_post
body: |
bb.0.entry:
liveins: %x0
STURWi %wzr, %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...
# CHECK-LABEL: name: test_STURXi_post
# CHECK: STRXpost %xzr, %x0, -4
name: test_STURXi_post
body: |
bb.0.entry:
liveins: %x0
STURXi %xzr, %x0, 0
%x0 = SUBXri %x0, 4, 0
RET_ReallyLR implicit %x0
...

View File

@ -10,11 +10,10 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK: dmb ish
; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0
; CHECK: ldrexb [[OLD:r[0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: strexb [[STATUS]], r2, [r0]
; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
@ -30,11 +29,10 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
; CHECK: dmb ish
; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0
; CHECK: ldrexh [[OLD:r[0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: strexh [[STATUS]], r2, [r0]
; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:
@ -50,11 +48,10 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
; CHECK: dmb ish
; CHECK-NOT: uxt
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0
; CHECK: ldrex [[OLD:r[0-9]+]], [r0]
; CHECK: cmp [[OLD]], [[DESIRED]]
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
; CHECK: strex [[STATUS]], r2, [r0]
; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
; CHECK: bne [[RETRY]]
; CHECK: [[DONE]]:

View File

@ -0,0 +1,84 @@
# RUN: llc -o - -mtriple=thumbv7--windows-gnu -run-pass=greedy -run-pass=virtregrewriter %s | FileCheck %s
--- |
target datalayout = "e-m:w-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7--windows-gnu"
define void @subregLiveThrough() { ret void }
define void @subregNotLiveThrough() { ret void }
define void @subregNotLiveThrough2() { ret void }
...
---
# Check that we properly recognize that r1 is live through
# the first subreg copy.
# That will materialize as an implicit use of the big register
# on that copy.
# PR34107.
#
# CHECK-LABEL: name: subregLiveThrough
name: subregLiveThrough
tracksRegLiveness: true
registers:
- { id: 0, class: gprpair }
body: |
bb.0:
liveins: %r0, %r1
; That copy is being coalesced so we should use a KILL
; placeholder. If that's not a kill that means we probably
; not coalescing %0 and %r0_r1 and thus we are not testing
; the problematic code anymore.
;
; CHECK: %r0 = KILL %r0, implicit killed %r0_r1, implicit-def %r0_r1
; CHECK-NEXT: %r1 = KILL %r1, implicit killed %r0_r1
undef %0.gsub_0 = COPY %r0
%0.gsub_1 = COPY %r1
tBX_RET 14, _, implicit %0
...
---
# Check that we properly recognize that r1 is *not* live through
# the first subreg copy.
# CHECK-LABEL: name: subregNotLiveThrough
name: subregNotLiveThrough
tracksRegLiveness: true
registers:
- { id: 0, class: gprpair }
body: |
bb.0:
liveins: %r0, %r1
; r1 is not live through so check we are not implicitly using
; the big register.
; CHECK: %r0 = KILL %r0, implicit-def %r0_r1
; CHECK-NEXT: tBX_RET
undef %0.gsub_0 = COPY %r0
tBX_RET 14, _, implicit %0
...
---
# Check that we properly recognize that r1 is *not* live through
# the first subreg copy. It is defined by this copy, but is not
# through.
# CHECK-LABEL: name: subregNotLiveThrough2
name: subregNotLiveThrough2
tracksRegLiveness: true
registers:
- { id: 0, class: gprpair }
body: |
bb.0:
liveins: %r0, %r1
; r1 is not live through so check we are not implicitly using
; the big register.
; CHECK: %r0 = KILL %r0, implicit-def %r1, implicit-def %r0_r1
; CHECK-NEXT: tBX_RET
undef %0.gsub_0 = COPY %r0, implicit-def %r1
tBX_RET 14, _, implicit %0
...

View File

@ -75,3 +75,30 @@ define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) {
ret i8 %ret;
}
; Try a version with loads. Previously we crashed on this.
define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) {
; CHECK-LABEL: load_crash
; CHECK: addb
; ADX: adcxq
; CHECK: setb
; CHECK: retq
%1 = load i64, i64* %a, align 8
%2 = load i64, i64* %b, align 8
%3 = bitcast i64* %res to i8*
%4 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2, i8* %3)
%conv = zext i8 %4 to i32
ret i32 %conv
}
; Try a really simple all zero input case, which also used to crash
define void @allzeros() {
; CHECK-LABEL: allzeros
; CHECK: xorl
; CHECK: addb
; CHECK: sbbq
; CHECK: andl
; CHECK: retq
entry:
%0 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0, i8* null)
ret void
}

View File

@ -1921,9 +1921,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8>
; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovq %rsi, %k1
; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1}
; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2
; AVX512BW-NEXT: vpbroadcastb %edi, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vpbroadcastb %edi, %zmm0 {%k1}
; AVX512BW-NEXT: vpbroadcastb %edi, %zmm2
; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
@ -1934,9 +1934,9 @@ define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8>
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z}
; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1}
; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2
; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm1 {%k1} {z}
; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
; AVX512F-32-NEXT: vpbroadcastb %eax, %zmm2
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl
@ -1954,20 +1954,20 @@ define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i
; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1}
; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vpbroadcastw %di, %zmm2
; AVX512BW-NEXT: vpbroadcastw %edi, %zmm1 {%k1} {z}
; AVX512BW-NEXT: vpbroadcastw %edi, %zmm0 {%k1}
; AVX512BW-NEXT: vpbroadcastw %edi, %zmm2
; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1}
; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z}
; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2
; AVX512F-32-NEXT: movw {{[0-9]+}}(%esp), %ax
; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm1 {%k1} {z}
; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm0 {%k1}
; AVX512F-32-NEXT: vpbroadcastw %eax, %zmm2
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl

View File

@ -2799,9 +2799,9 @@ define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7]
; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
; CHECK-NEXT: vpbroadcastb %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7]
; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -2819,9 +2819,9 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8>
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7]
; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
; CHECK-NEXT: vpbroadcastb %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7]
; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -2839,9 +2839,9 @@ define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7]
; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
; CHECK-NEXT: vpbroadcastw %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7]
; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
@ -2859,9 +2859,9 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16
; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7]
; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
; CHECK-NEXT: vpbroadcastw %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7]
; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]

View File

@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL
; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr {
; KNL-LABEL: test:
; KNL: # BB#0: # %bb
; KNL-NEXT: vpextrb $0, %xmm0, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld1
; KNL-NEXT: fldz
; KNL-NEXT: fld %st(0)
; KNL-NEXT: fcmovne %st(2), %st(0)
; KNL-NEXT: vpextrb $4, %xmm0, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(1)
; KNL-NEXT: fcmovne %st(3), %st(0)
; KNL-NEXT: vpextrb $8, %xmm0, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(2)
; KNL-NEXT: fcmovne %st(4), %st(0)
; KNL-NEXT: vpextrb $12, %xmm0, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fcmovne %st(4), %st(0)
; KNL-NEXT: fstp %st(4)
; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fstpt 30(%rdi)
; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 10(%rdi)
; KNL-NEXT: fstpt (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: test:
; SKX: # BB#0: # %bb
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: kshiftrw $2, %k0, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k2
; SKX-NEXT: kshiftrw $15, %k2, %k2
; SKX-NEXT: kshiftlw $15, %k2, %k2
; SKX-NEXT: kshiftrw $15, %k2, %k2
; SKX-NEXT: kmovd %k2, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld1
; SKX-NEXT: fldz
; SKX-NEXT: fld %st(0)
; SKX-NEXT: fcmovne %st(2), %st(0)
; SKX-NEXT: kshiftlw $14, %k1, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(1)
; SKX-NEXT: fcmovne %st(3), %st(0)
; SKX-NEXT: kshiftlw $15, %k0, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kshiftlw $15, %k1, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(2)
; SKX-NEXT: fcmovne %st(4), %st(0)
; SKX-NEXT: kshiftlw $14, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fxch %st(3)
; SKX-NEXT: fcmovne %st(4), %st(0)
; SKX-NEXT: fstp %st(4)
; SKX-NEXT: fxch %st(3)
; SKX-NEXT: fstpt 10(%rdi)
; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt (%rdi)
; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 30(%rdi)
; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: retq
bb:
%tmp = select <4 x i1> %m, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16
ret void
}

View File

@ -0,0 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mcpu=pentium4 | FileCheck %s
%struct.Foo = type { i32, %struct.Bar }
%struct.Bar = type { i32, %struct.Buffer, i32 }
%struct.Buffer = type { i8*, i32 }
; This test checks that the load of store %2 is not dropped.
;
define i32 @pr34088() local_unnamed_addr {
; CHECK-LABEL: pr34088:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: .Lcfi0:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_offset %ebp, -8
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_def_cfa_register %ebp
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: subl $32, %esp
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movaps %xmm0, (%esp)
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movaps %xmm1, (%esp)
; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl
entry:
%foo = alloca %struct.Foo, align 4
%0 = bitcast %struct.Foo* %foo to i8*
call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 0, i32 20, i32 4, i1 false)
%buffer1 = getelementptr inbounds %struct.Foo, %struct.Foo* %foo, i32 0, i32 1, i32 1
%1 = bitcast %struct.Buffer* %buffer1 to i64*
%2 = load i64, i64* %1, align 4
call void @llvm.memset.p0i8.i32(i8* nonnull %0, i8 -51, i32 20, i32 4, i1 false)
store i64 %2, i64* %1, align 4
ret i32 0
}
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1)

View File

@ -0,0 +1,120 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=X64
; RUN: llc -mtriple=i686-unknown-unknown -mattr=+mmx < %s | FileCheck %s --check-prefix=I32
; From source: clang -02
;__m64 test47(int a)
;{
; __m64 x = (a)? (__m64)(7): (__m64)(0);
; return __builtin_ia32_psllw(x, x);
;}
define i64 @test47(i64 %arg) {
;
; X64-LABEL: test47:
; X64: # BB#0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testq %rdi, %rdi
; X64-NEXT: movl $7, %ecx
; X64-NEXT: cmoveq %rcx, %rax
; X64-NEXT: movd %rax, %mm0
; X64-NEXT: psllw %mm0, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
;
; I32-LABEL: test47:
; I32: # BB#0:
; I32-NEXT: pushl %ebp
; I32-NEXT: .Lcfi0:
; I32-NEXT: .cfi_def_cfa_offset 8
; I32-NEXT: .Lcfi1:
; I32-NEXT: .cfi_offset %ebp, -8
; I32-NEXT: movl %esp, %ebp
; I32-NEXT: .Lcfi2:
; I32-NEXT: .cfi_def_cfa_register %ebp
; I32-NEXT: andl $-8, %esp
; I32-NEXT: subl $16, %esp
; I32-NEXT: movl 8(%ebp), %eax
; I32-NEXT: orl 12(%ebp), %eax
; I32-NEXT: movl $7, %eax
; I32-NEXT: je .LBB0_2
; I32-NEXT: # BB#1:
; I32-NEXT: xorl %eax, %eax
; I32-NEXT: .LBB0_2:
; I32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; I32-NEXT: movl $0, {{[0-9]+}}(%esp)
; I32-NEXT: movq {{[0-9]+}}(%esp), %mm0
; I32-NEXT: psllw %mm0, %mm0
; I32-NEXT: movq %mm0, (%esp)
; I32-NEXT: movl (%esp), %eax
; I32-NEXT: movl {{[0-9]+}}(%esp), %edx
; I32-NEXT: movl %ebp, %esp
; I32-NEXT: popl %ebp
; I32-NEXT: retl
%cond = icmp eq i64 %arg, 0
%slct = select i1 %cond, x86_mmx bitcast (i64 7 to x86_mmx), x86_mmx bitcast (i64 0 to x86_mmx)
%psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct)
%retc = bitcast x86_mmx %psll to i64
ret i64 %retc
}
; From source: clang -O2
;__m64 test49(int a, long long n, long long m)
;{
; __m64 x = (a)? (__m64)(n): (__m64)(m);
; return __builtin_ia32_psllw(x, x);
;}
define i64 @test49(i64 %arg, i64 %x, i64 %y) {
;
; X64-LABEL: test49:
; X64: # BB#0:
; X64-NEXT: testq %rdi, %rdi
; X64-NEXT: cmovneq %rdx, %rsi
; X64-NEXT: movd %rsi, %mm0
; X64-NEXT: psllw %mm0, %mm0
; X64-NEXT: movd %mm0, %rax
; X64-NEXT: retq
;
; I32-LABEL: test49:
; I32: # BB#0:
; I32-NEXT: pushl %ebp
; I32-NEXT: .Lcfi3:
; I32-NEXT: .cfi_def_cfa_offset 8
; I32-NEXT: .Lcfi4:
; I32-NEXT: .cfi_offset %ebp, -8
; I32-NEXT: movl %esp, %ebp
; I32-NEXT: .Lcfi5:
; I32-NEXT: .cfi_def_cfa_register %ebp
; I32-NEXT: andl $-8, %esp
; I32-NEXT: subl $8, %esp
; I32-NEXT: movl 8(%ebp), %eax
; I32-NEXT: orl 12(%ebp), %eax
; I32-NEXT: je .LBB1_1
; I32-NEXT: # BB#2:
; I32-NEXT: leal 24(%ebp), %eax
; I32-NEXT: jmp .LBB1_3
; I32-NEXT: .LBB1_1:
; I32-NEXT: leal 16(%ebp), %eax
; I32-NEXT: .LBB1_3:
; I32-NEXT: movq (%eax), %mm0
; I32-NEXT: psllw %mm0, %mm0
; I32-NEXT: movq %mm0, (%esp)
; I32-NEXT: movl (%esp), %eax
; I32-NEXT: movl {{[0-9]+}}(%esp), %edx
; I32-NEXT: movl %ebp, %esp
; I32-NEXT: popl %ebp
; I32-NEXT: retl
%cond = icmp eq i64 %arg, 0
%xmmx = bitcast i64 %x to x86_mmx
%ymmx = bitcast i64 %y to x86_mmx
%slct = select i1 %cond, x86_mmx %xmmx, x86_mmx %ymmx
%psll = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %slct, x86_mmx %slct)
%retc = bitcast x86_mmx %psll to i64
ret i64 %retc
}
declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx)

View File

@ -1643,7 +1643,7 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $8, %eax
; AVX512VL-NEXT: vpbroadcastb %al, %xmm0
; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32
@ -1696,7 +1696,7 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
; AVX512VL-NEXT: vpbroadcastb %al, %xmm0
; AVX512VL-NEXT: vpbroadcastb %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32

View File

@ -2274,7 +2274,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@ -2390,7 +2390,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@ -2443,7 +2443,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: shrl $16, %eax
; AVX512VL-NEXT: vpbroadcastw %ax, %xmm0
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32

View File

@ -4069,7 +4069,7 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %ax, %ymm0
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32

View File

@ -2431,7 +2431,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) {
; AVX512VL: # BB#0:
; AVX512VL-NEXT: movsbl (%rdi), %eax
; AVX512VL-NEXT: shrl $8, %eax
; AVX512VL-NEXT: vpbroadcastb %al, %ymm0
; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32

View File

@ -228,7 +228,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
; SKX-LABEL: insert_dup_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movl (%rdi), %eax
; SKX-NEXT: vpbroadcastw %ax, %zmm0
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@ -249,7 +249,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
; SKX: ## BB#0:
; SKX-NEXT: movswl (%rdi), %eax
; SKX-NEXT: vpbroadcastw %ax, %zmm0
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i16, i16* %ptr, align 2
%tmp1 = sext i16 %tmp to i32
@ -269,7 +269,7 @@ define <32 x i16> @insert_dup_elt1_mem_v32i16_i32(i32* %ptr) #0 {
; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movzwl 2(%rdi), %eax
; SKX-NEXT: vpbroadcastw %ax, %zmm0
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@ -288,7 +288,7 @@ define <32 x i16> @insert_dup_elt3_mem_v32i16_i32(i32* %ptr) #0 {
; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
; SKX: ## BB#0:
; SKX-NEXT: movzwl 2(%rdi), %eax
; SKX-NEXT: vpbroadcastw %ax, %zmm0
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i32, i32* %ptr, align 4
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1

View File

@ -332,7 +332,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: movsbl (%rdi), %eax
; AVX512BW-NEXT: shrl $8, %eax
; AVX512BW-NEXT: vpbroadcastb %al, %zmm0
; AVX512BW-NEXT: vpbroadcastb %eax, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: insert_dup_elt1_mem_v64i8_sext_i8:
@ -348,7 +348,7 @@ define <64 x i8> @insert_dup_elt1_mem_v64i8_sext_i8(i8* %ptr) {
; AVX512VBMI: # BB#0:
; AVX512VBMI-NEXT: movsbl (%rdi), %eax
; AVX512VBMI-NEXT: shrl $8, %eax
; AVX512VBMI-NEXT: vpbroadcastb %al, %zmm0
; AVX512VBMI-NEXT: vpbroadcastb %eax, %zmm0
; AVX512VBMI-NEXT: retq
%tmp = load i8, i8* %ptr, align 1
%tmp1 = sext i8 %tmp to i32

View File

@ -0,0 +1,8 @@
fun:dfsan_get_label=uninstrumented
fun:dfsan_get_label=custom
fun:k2=uninstrumented
fun:k2=custom
fun:k4=uninstrumented
fun:k4=custom

View File

@ -47,13 +47,13 @@ define void @f(i32 %x) {
; CHECK: %[[LABELVA1:.*]] = alloca [2 x i16]
; CHECK: %[[LABELRETURN:.*]] = alloca i16
; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 0, i16 0)
; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 zeroext 0, i16 zeroext 0)
call void @custom1(i32 1, i32 2)
; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 0, i16 0, i16* %[[LABELRETURN]])
; CHECK: call i32 @__dfsw_custom2(i32 1, i32 2, i16 zeroext 0, i16 zeroext 0, i16* %[[LABELRETURN]])
call i32 @custom2(i32 1, i32 2)
; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 0)
; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 zeroext 0)
call void @customcb(i32 (i32)* @cb)
; CHECK: %[[LABELVA1_0:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 0
@ -61,12 +61,12 @@ define void @f(i32 %x) {
; CHECK: %[[LABELVA1_1:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 1
; CHECK: store i16 %{{.*}}, i16* %[[LABELVA1_1]]
; CHECK: %[[LABELVA1_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA1]], i32 0, i32 0
; CHECK: call void (i32, i16, i16*, ...) @__dfsw_custom3(i32 1, i16 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}})
; CHECK: call void (i32, i16, i16*, ...) @__dfsw_custom3(i32 1, i16 zeroext 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}})
call void (i32, ...) @custom3(i32 1, i32 2, i32 %x)
; CHECK: %[[LABELVA2_0:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0
; CHECK: %[[LABELVA2_0A:.*]] = getelementptr inbounds [2 x i16], [2 x i16]* %[[LABELVA2]], i32 0, i32 0
; CHECK: call i32 (i32, i16, i16*, i16*, ...) @__dfsw_custom4(i32 1, i16 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3)
; CHECK: call i32 (i32, i16, i16*, i16*, ...) @__dfsw_custom4(i32 1, i16 zeroext 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3)
call i32 (i32, ...) @custom4(i32 1, i32 2, i32 3)
ret void

View File

@ -0,0 +1,54 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -dfsan -S --dfsan-abilist=%S/Inputs/shadow-args-abilist.txt | FileCheck %s
; REQUIRES: x86-registered-target
; Test that the custom abi marks shadow parameters as zero extended.
define i32 @m() {
entry:
%call = call zeroext i16 @dfsan_get_label(i64 signext 56)
%conv = zext i16 %call to i32
ret i32 %conv
}
; CHECK-LABEL: @"dfs$m"
; CHECK: %{{.*}} = call zeroext i16 @__dfsw_dfsan_get_label(i64 signext 56, i16 zeroext 0, i16* %{{.*}})
define i32 @k() {
entry:
%call = call zeroext i16 @k2(i64 signext 56, i64 signext 67)
%conv = zext i16 %call to i32
ret i32 %conv
}
; CHECK-LABEL: @"dfs$k"
; CHECK: %{{.*}} = call zeroext i16 @__dfsw_k2(i64 signext 56, i64 signext 67, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16* %{{.*}})
define i32 @k3() {
entry:
%call = call zeroext i16 @k4(i64 signext 56, i64 signext 67, i64 signext 78, i64 signext 89)
%conv = zext i16 %call to i32
ret i32 %conv
}
; CHECK-LABEL: @"dfs$k3"
; CHECK: %{{.*}} = call zeroext i16 @__dfsw_k4(i64 signext 56, i64 signext 67, i64 signext 78, i64 signext 89, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16 zeroext {{.*}}, i16* %{{.*}})
declare zeroext i16 @dfsan_get_label(i64 signext)
; CHECK-LABEL: @"dfsw$dfsan_get_label"
; CHECK: %{{.*}} = call i16 @__dfsw_dfsan_get_label(i64 %0, i16 zeroext %1, i16* %{{.*}})
declare zeroext i16 @k2(i64 signext, i64 signext)
; CHECK-LABEL: @"dfsw$k2"
; CHECK: %{{.*}} = call i16 @__dfsw_k2(i64 %{{.*}}, i64 %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16* %{{.*}})
declare zeroext i16 @k4(i64 signext, i64 signext, i64 signext, i64 signext)
; CHECK-LABEL: @"dfsw$k4"
; CHECK: %{{.*}} = call i16 @__dfsw_k4(i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16 zeroext %{{.*}}, i16* %{{.*}})
; CHECK: declare zeroext i16 @__dfsw_dfsan_get_label(i64 signext, i16, i16*)
; CHECK: declare zeroext i16 @__dfsw_k2(i64 signext, i64 signext, i16, i16, i16*)
; CHECK: declare zeroext i16 @__dfsw_k4(i64 signext, i64 signext, i64 signext, i64 signext, i16, i16, i16, i16, i16*)

View File

@ -0,0 +1,100 @@
; RUN: opt -bdce %s -S | FileCheck %s
; The 'nuw' on the subtract allows us to deduce that %setbit is not demanded.
; But if we change that value to '0', then the 'nuw' is no longer valid. If we don't
; remove the 'nuw', another pass (-instcombine) may make a transform based on an
; that incorrect assumption and we can miscompile:
; https://bugs.llvm.org/show_bug.cgi?id=33695
define i1 @PR33695(i1 %b, i8 %x) {
; CHECK-LABEL: @PR33695(
; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64
; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8
; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]]
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1
; CHECK-NEXT: ret i1 [[TRUNC]]
;
%setbit = or i8 %x, 64
%little_number = zext i1 %b to i8
%big_number = shl i8 %setbit, 1
%sub = sub nuw i8 %big_number, %little_number
%trunc = trunc i8 %sub to i1
ret i1 %trunc
}
; Similar to above, but now with more no-wrap.
; https://bugs.llvm.org/show_bug.cgi?id=34037
define i64 @PR34037(i64 %m, i32 %r, i64 %j, i1 %b, i32 %k, i64 %p) {
; CHECK-LABEL: @PR34037(
; CHECK-NEXT: [[CONV:%.*]] = zext i32 %r to i64
; CHECK-NEXT: [[AND:%.*]] = and i64 %m, 0
; CHECK-NEXT: [[NEG:%.*]] = xor i64 0, 34359738367
; CHECK-NEXT: [[OR:%.*]] = or i64 %j, 0
; CHECK-NEXT: [[SHL:%.*]] = shl i64 0, 29
; CHECK-NEXT: [[CONV1:%.*]] = select i1 %b, i64 7, i64 0
; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[SHL]], [[CONV1]]
; CHECK-NEXT: [[CONV2:%.*]] = zext i32 %k to i64
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[SUB]], [[CONV2]]
; CHECK-NEXT: [[CONV4:%.*]] = and i64 %p, 65535
; CHECK-NEXT: [[AND5:%.*]] = and i64 [[MUL]], [[CONV4]]
; CHECK-NEXT: ret i64 [[AND5]]
;
%conv = zext i32 %r to i64
%and = and i64 %m, %conv
%neg = xor i64 %and, 34359738367
%or = or i64 %j, %neg
%shl = shl i64 %or, 29
%conv1 = select i1 %b, i64 7, i64 0
%sub = sub nuw nsw i64 %shl, %conv1
%conv2 = zext i32 %k to i64
%mul = mul nsw i64 %sub, %conv2
%conv4 = and i64 %p, 65535
%and5 = and i64 %mul, %conv4
ret i64 %and5
}
; This is a manufactured example based on the 1st test to prove that the
; assumption-killing algorithm stops at the call. Ie, it does not remove
; nsw/nuw from the 'add' because a call demands all bits of its argument.
declare i1 @foo(i1)
define i1 @poison_on_call_user_is_ok(i1 %b, i8 %x) {
; CHECK-LABEL: @poison_on_call_user_is_ok(
; CHECK-NEXT: [[SETBIT:%.*]] = or i8 %x, 64
; CHECK-NEXT: [[LITTLE_NUMBER:%.*]] = zext i1 %b to i8
; CHECK-NEXT: [[BIG_NUMBER:%.*]] = shl i8 0, 1
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[BIG_NUMBER]], [[LITTLE_NUMBER]]
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[SUB]] to i1
; CHECK-NEXT: [[CALL_RESULT:%.*]] = call i1 @foo(i1 [[TRUNC]])
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i1 [[CALL_RESULT]], true
; CHECK-NEXT: [[MUL:%.*]] = mul i1 [[TRUNC]], [[ADD]]
; CHECK-NEXT: ret i1 [[MUL]]
;
%setbit = or i8 %x, 64
%little_number = zext i1 %b to i8
%big_number = shl i8 %setbit, 1
%sub = sub nuw i8 %big_number, %little_number
%trunc = trunc i8 %sub to i1
%call_result = call i1 @foo(i1 %trunc)
%add = add nsw nuw i1 %call_result, 1
%mul = mul i1 %trunc, %add
ret i1 %mul
}
; We were asserting that all users of a trivialized integer-type instruction were
; also integer-typed, but that's too strong. The alloca has a pointer-type result.
define void @PR34179(i32* %a) {
; CHECK-LABEL: @PR34179(
; CHECK-NEXT: [[T0:%.*]] = load volatile i32, i32* %a
; CHECK-NEXT: ret void
;
%t0 = load volatile i32, i32* %a
%vla = alloca i32, i32 %t0
ret void
}

View File

@ -3,15 +3,14 @@
; Check IndVarSimplify should not replace exit value because or else
; udiv will be introduced by expand and the cost will be high.
;
; CHECK-LABEL: @_Z3fooPKcjj(
; CHECK-NOT: udiv
declare void @_Z3mixRjj(i32* dereferenceable(4), i32)
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
; CHECK-LABEL: @_Z3fooPKcjj(
; CHECK-NOT: udiv
entry:
%a = alloca i32, align 4
%tmp = bitcast i32* %a to i8*
@ -50,3 +49,26 @@ while.end: ; preds = %while.cond.while.en
call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp)
ret i32 %tmp4
}
define i32 @zero_backedge_count_test(i32 %unknown_init, i32* %unknown_mem) {
; CHECK-LABEL: @zero_backedge_count_test(
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry], [ %iv.inc, %loop ]
%unknown_phi = phi i32 [ %unknown_init, %entry ], [ %unknown_next, %loop ]
%iv.inc = add i32 %iv, 1
%be_taken = icmp ne i32 %iv.inc, 1
%unknown_next = load volatile i32, i32* %unknown_mem
br i1 %be_taken, label %loop, label %leave
leave:
; We can fold %unknown_phi even though the backedge value for it is completely
; unknown, since we can prove that the loop's backedge taken count is 0.
; CHECK: leave:
; CHECK: ret i32 %unknown_init
%exit_val = phi i32 [ %unknown_phi, %loop ]
ret i32 %exit_val
}

View File

@ -0,0 +1,74 @@
; RUN: opt -simplifycfg -S < %s | FileCheck %s
; Just checking for lack of crash here, but we should be able to check the IR?
; Earlier version using auto-generated checks from utils/update_test_checks.py
; had bot problems though...
define void @patatino() {
; CHECK-LABEL: @patatino
br label %bb1
bb1: ; preds = %bb36, %0
br label %bb2
bb2: ; preds = %bb3, %bb1
br i1 undef, label %bb4, label %bb3
bb3: ; preds = %bb4, %bb2
br i1 undef, label %bb2, label %bb5
bb4: ; preds = %bb2
switch i32 undef, label %bb3 [
]
bb5: ; preds = %bb3
br label %bb6
bb6: ; preds = %bb5
br i1 undef, label %bb7, label %bb9
bb7: ; preds = %bb6
%tmp = or i64 undef, 1
%tmp8 = icmp ult i64 %tmp, 0
br i1 %tmp8, label %bb12, label %bb9
bb9: ; preds = %bb35, %bb34, %bb33, %bb32, %bb31, %bb30, %bb27, %bb24, %bb21, %bb18, %bb16, %bb14, %bb12, %bb7, %bb6
br label %bb11
bb10: ; preds = %bb36
br label %bb11
bb11: ; preds = %bb10, %bb9
ret void
bb12: ; preds = %bb7
%tmp13 = icmp ult i64 0, 0
br i1 %tmp13, label %bb14, label %bb9
bb14: ; preds = %bb12
%tmp15 = icmp ult i64 undef, 0
br i1 %tmp15, label %bb16, label %bb9
bb16: ; preds = %bb14
%tmp17 = icmp ult i64 undef, 0
br i1 %tmp17, label %bb18, label %bb9
bb18: ; preds = %bb16
%tmp19 = or i64 undef, 5
%tmp20 = icmp ult i64 %tmp19, 0
br i1 %tmp20, label %bb21, label %bb9
bb21: ; preds = %bb18
%tmp22 = or i64 undef, 6
%tmp23 = icmp ult i64 %tmp22, 0
br i1 %tmp23, label %bb24, label %bb9
bb24: ; preds = %bb21
%tmp25 = or i64 undef, 7
%tmp26 = icmp ult i64 %tmp25, 0
br i1 %tmp26, label %bb27, label %bb9
bb27: ; preds = %bb24
%tmp28 = or i64 undef, 8
%tmp29 = icmp ult i64 %tmp28, 0
br i1 %tmp29, label %bb30, label %bb9
bb30: ; preds = %bb27
br i1 undef, label %bb31, label %bb9
bb31: ; preds = %bb30
br i1 undef, label %bb32, label %bb9
bb32: ; preds = %bb31
br i1 undef, label %bb33, label %bb9
bb33: ; preds = %bb32
br i1 undef, label %bb34, label %bb9
bb34: ; preds = %bb33
br i1 undef, label %bb35, label %bb9
bb35: ; preds = %bb34
br i1 undef, label %bb36, label %bb9
bb36: ; preds = %bb35
br i1 undef, label %bb1, label %bb10
}

View File

@ -871,7 +871,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
uint64_t Val = O->getPlainRelocationSymbolNum(RE);
if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
fmt << format("0x%x", Val);
fmt << format("0x%0" PRIx64, Val);
return;
} else if (isExtern) {
symbol_iterator SI = O->symbol_begin();

View File

@ -25,7 +25,7 @@ def __init__(self, progname, path, quiet,
params, config_prefix = None,
maxIndividualTestTime = 0,
maxFailures = None,
parallelism_groups = [],
parallelism_groups = {},
echo_all_commands = False):
# The name of the test runner.
self.progname = progname

View File

@ -313,7 +313,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
elif op == ('<',):
redirects[0] = [filename, 'r', None]
else:
raise InternalShellError(cmd, "Unsupported redirect: %r" % (r,))
raise InternalShellError(cmd, "Unsupported redirect: %r" % ((op, filename),))
# Open file descriptors in a second pass.
std_fds = [None, None, None]

View File

@ -1,3 +1,8 @@
from lit.formats.base import TestFormat # noqa: F401
from lit.formats.base import ( # noqa: F401
TestFormat,
FileBasedTest,
OneCommandPerFileTest
)
from lit.formats.googletest import GoogleTest # noqa: F401
from lit.formats.shtest import ShTest # noqa: F401

View File

@ -1,50 +1,117 @@
import abc
from __future__ import absolute_import
import os
import lit.Test
import lit.util
class TestFormat(object):
"""Base class for test formats.
pass
A TestFormat encapsulates logic for finding and executing a certain type of
test. For example, a subclass FooTestFormat would contain the logic for
finding tests written in the 'Foo' format, and the logic for running a
single one.
###
TestFormat is an Abstract Base Class (ABC). It uses the Python abc.ABCMeta
type and associated @abc.abstractmethod decorator. Together, these provide
subclass behaviour which is notionally similar to C++ pure virtual classes:
only subclasses which implement all abstract methods can be instantiated
(the implementation may come from an intermediate base).
class FileBasedTest(TestFormat):
def getTestsInDirectory(self, testSuite, path_in_suite,
litConfig, localConfig):
source_path = testSuite.getSourcePath(path_in_suite)
for filename in os.listdir(source_path):
# Ignore dot files and excluded tests.
if (filename.startswith('.') or
filename in localConfig.excludes):
continue
For details on ABCs, see: https://docs.python.org/2/library/abc.html. Note
that Python ABCs have extensive abilities beyond what is used here. For
TestFormat, we only care about enforcing that abstract methods are
implemented.
"""
filepath = os.path.join(source_path, filename)
if not os.path.isdir(filepath):
base,ext = os.path.splitext(filename)
if ext in localConfig.suffixes:
yield lit.Test.Test(testSuite, path_in_suite + (filename,),
localConfig)
__metaclass__ = abc.ABCMeta
###
@abc.abstractmethod
def getTestsInDirectory(self, testSuite, path_in_suite, litConfig,
localConfig):
"""Finds tests of this format in the given directory.
import re
import tempfile
Args:
testSuite: a Test.TestSuite object.
path_in_suite: the subpath under testSuite to look for tests.
litConfig: the LitConfig for the test suite.
localConfig: a LitConfig with local specializations.
class OneCommandPerFileTest(TestFormat):
# FIXME: Refactor into generic test for running some command on a directory
# of inputs.
Returns:
An iterable of Test.Test objects.
"""
def __init__(self, command, dir, recursive=False,
pattern=".*", useTempInput=False):
if isinstance(command, str):
self.command = [command]
else:
self.command = list(command)
if dir is not None:
dir = str(dir)
self.dir = dir
self.recursive = bool(recursive)
self.pattern = re.compile(pattern)
self.useTempInput = useTempInput
def getTestsInDirectory(self, testSuite, path_in_suite,
litConfig, localConfig):
dir = self.dir
if dir is None:
dir = testSuite.getSourcePath(path_in_suite)
for dirname,subdirs,filenames in os.walk(dir):
if not self.recursive:
subdirs[:] = []
subdirs[:] = [d for d in subdirs
if (d != '.svn' and
d not in localConfig.excludes)]
for filename in filenames:
if (filename.startswith('.') or
not self.pattern.match(filename) or
filename in localConfig.excludes):
continue
path = os.path.join(dirname,filename)
suffix = path[len(dir):]
if suffix.startswith(os.sep):
suffix = suffix[1:]
test = lit.Test.Test(
testSuite, path_in_suite + tuple(suffix.split(os.sep)),
localConfig)
# FIXME: Hack?
test.source_path = path
yield test
def createTempInput(self, tmp, test):
raise NotImplementedError('This is an abstract method.')
@abc.abstractmethod
def execute(self, test, litConfig):
"""Runs the given 'test', which is of this format.
if test.config.unsupported:
return (lit.Test.UNSUPPORTED, 'Test is unsupported')
Args:
test: a Test.Test object describing the test to run.
litConfig: the LitConfig for the test suite.
cmd = list(self.command)
Returns:
A tuple of (status:Test.ResultCode, message:str)
"""
# If using temp input, create a temporary file and hand it to the
# subclass.
if self.useTempInput:
tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
self.createTempInput(tmp, test)
tmp.flush()
cmd.append(tmp.name)
elif hasattr(test, 'source_path'):
cmd.append(test.source_path)
else:
cmd.append(test.getSourcePath())
out, err, exitCode = lit.util.executeCommand(cmd)
diags = out + err
if not exitCode and not diags.strip():
return lit.Test.PASS,''
# Try to include some useful information.
report = """Command: %s\n""" % ' '.join(["'%s'" % a
for a in cmd])
if self.useTempInput:
report += """Temporary File: %s\n""" % tmp.name
report += "--\n%s--\n""" % open(tmp.name).read()
report += """Output:\n--\n%s--""" % diags
return lit.Test.FAIL, report

View File

@ -1,13 +1,12 @@
from __future__ import absolute_import
import os
import lit.Test
import lit.TestRunner
import lit.util
from .base import TestFormat
class ShTest(TestFormat):
from .base import FileBasedTest
class ShTest(FileBasedTest):
"""ShTest is a format with one file per test.
This is the primary format for regression tests as described in the LLVM
@ -18,31 +17,9 @@ class ShTest(TestFormat):
The ShTest files contain some number of shell-like command pipelines, along
with assertions about what should be in the output.
"""
def __init__(self, execute_external = False):
"""Initializer.
The 'execute_external' argument controls whether lit uses its internal
logic for command pipelines, or passes the command to a shell
subprocess.
Args:
execute_external: (optional) If true, use shell subprocesses instead
of lit's internal pipeline logic.
"""
def __init__(self, execute_external=False):
self.execute_external = execute_external
def getTestsInDirectory(self, testSuite, path_in_suite,
litConfig, localConfig):
"""Yields test files matching 'suffixes' from the localConfig."""
file_matches = lit.util.listdir_files(
testSuite.getSourcePath(path_in_suite),
localConfig.suffixes, localConfig.excludes)
for filename in file_matches:
yield lit.Test.Test(testSuite, path_in_suite + (filename,),
localConfig)
def execute(self, test, litConfig):
"""Interprets and runs the given test file, and returns the result."""
return lit.TestRunner.executeShTest(test, litConfig,
self.execute_external)

View File

@ -44,6 +44,12 @@ class Run(object):
def __init__(self, lit_config, tests):
self.lit_config = lit_config
self.tests = tests
# Set up semaphores to limit parallelism of certain classes of tests.
# For example, some ASan tests require lots of virtual memory and run
# faster with less parallelism on OS X.
self.parallelism_semaphores = \
{k: multiprocessing.Semaphore(v) for k, v in
self.lit_config.parallelism_groups.items()}
def execute_test(self, test):
return _execute_test_impl(test, self.lit_config,
@ -74,13 +80,6 @@ def execute_tests(self, display, jobs, max_time=None):
if not self.tests or jobs == 0:
return
# Set up semaphores to limit parallelism of certain classes of tests.
# For example, some ASan tests require lots of virtual memory and run
# faster with less parallelism on OS X.
self.parallelism_semaphores = \
{k: multiprocessing.Semaphore(v) for k, v in
self.lit_config.parallelism_groups.items()}
# Install a console-control signal handler on Windows.
if win32api is not None:
def console_ctrl_handler(type):

View File

@ -0,0 +1,6 @@
import lit.formats
config.name = 'shtest-shell'
config.suffixes = ['.txt']
config.test_format = lit.formats.ShTest()
config.test_source_root = os.path.dirname(__file__) + '/../shtest-shell'
config.test_exec_root = None

View File

@ -1,9 +1,9 @@
# Check the behavior of --max-failures option.
#
# RUN: not %{lit} -j 1 -v %{inputs}/shtest-shell > %t.out
# RUN: not %{lit} --max-failures=1 -j 1 -v %{inputs}/shtest-shell >> %t.out
# RUN: not %{lit} --max-failures=2 -j 1 -v %{inputs}/shtest-shell >> %t.out
# RUN: not %{lit} --max-failures=0 -j 1 -v %{inputs}/shtest-shell 2>> %t.out
# RUN: not %{lit} -j 1 -v %{inputs}/max-failures > %t.out
# RUN: not %{lit} --max-failures=1 -j 1 -v %{inputs}/max-failures >> %t.out
# RUN: not %{lit} --max-failures=2 -j 1 -v %{inputs}/max-failures >> %t.out
# RUN: not %{lit} --max-failures=0 -j 1 -v %{inputs}/max-failures 2>> %t.out
# RUN: FileCheck < %t.out %s
#
# END.

View File

@ -9,7 +9,7 @@
# Check that regex-filtering based on environment variables work.
#
# RUN: LIT_FILTER='o[a-z]e' %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER-ENV %s
# RUN: env LIT_FILTER='o[a-z]e' %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER-ENV %s
# CHECK-FILTER-ENV: Testing: 2 of 5 tests

View File

@ -403,14 +403,6 @@ function test_llvmCore() {
fi
if [ $do_test_suite = 'yes' ]; then
SandboxDir="$BuildDir/sandbox"
Lit=$SandboxDir/bin/lit
TestSuiteBuildDir="$BuildDir/test-suite-build"
TestSuiteSrcDir="$BuildDir/test-suite.src"
virtualenv $SandboxDir
$SandboxDir/bin/python $BuildDir/llvm.src/utils/lit/setup.py install
mkdir -p $TestSuiteBuildDir
cd $TestSuiteBuildDir
env CC="$c_compiler" CXX="$cxx_compiler" \
cmake $TestSuiteSrcDir -DTEST_SUITE_LIT=$Lit
@ -466,6 +458,19 @@ if [ "$do_checkout" = "yes" ]; then
export_sources
fi
# Setup the test-suite. Do this early so we can catch failures before
# we do the full 3 stage build.
if [ $do_test_suite = "yes" ]; then
SandboxDir="$BuildDir/sandbox"
Lit=$SandboxDir/bin/lit
TestSuiteBuildDir="$BuildDir/test-suite-build"
TestSuiteSrcDir="$BuildDir/test-suite.src"
virtualenv $SandboxDir
$SandboxDir/bin/python $BuildDir/llvm.src/utils/lit/setup.py install
mkdir -p $TestSuiteBuildDir
fi
(
Flavors="Release"
if [ "$do_debug" = "yes" ]; then