Merge llvm, clang, lld, lldb, compiler-rt and libc++ release_70 branch
r339355, resolve conflicts, and bump version numbers.
This commit is contained in:
commit
3beb5372da
@ -1863,6 +1863,40 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
|
||||
MaxRecurse))
|
||||
return V;
|
||||
|
||||
// Assuming the effective width of Y is not larger than A, i.e. all bits
|
||||
// from X and Y are disjoint in (X << A) | Y,
|
||||
// if the mask of this AND op covers all bits of X or Y, while it covers
|
||||
// no bits from the other, we can bypass this AND op. E.g.,
|
||||
// ((X << A) | Y) & Mask -> Y,
|
||||
// if Mask = ((1 << effective_width_of(Y)) - 1)
|
||||
// ((X << A) | Y) & Mask -> X << A,
|
||||
// if Mask = ((1 << effective_width_of(X)) - 1) << A
|
||||
// SimplifyDemandedBits in InstCombine can optimize the general case.
|
||||
// This pattern aims to help other passes for a common case.
|
||||
Value *Y, *XShifted;
|
||||
if (match(Op1, m_APInt(Mask)) &&
|
||||
match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
|
||||
m_Value(XShifted)),
|
||||
m_Value(Y)))) {
|
||||
const unsigned ShftCnt = ShAmt->getZExtValue();
|
||||
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
|
||||
const unsigned Width = Op0->getType()->getScalarSizeInBits();
|
||||
const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
|
||||
if (EffWidthY <= ShftCnt) {
|
||||
const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
|
||||
Q.DT);
|
||||
const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
|
||||
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
|
||||
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
|
||||
// If the mask is extracting all bits from X or Y as is, we can skip
|
||||
// this AND op.
|
||||
if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
|
||||
return Y;
|
||||
if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
|
||||
return XShifted;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -2817,10 +2817,13 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::maxnum:
|
||||
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
|
||||
Depth + 1) ||
|
||||
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
|
||||
Depth + 1);
|
||||
return (isKnownNeverNaN(I->getOperand(0)) &&
|
||||
cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
|
||||
SignBitOnly, Depth + 1)) ||
|
||||
(isKnownNeverNaN(I->getOperand(1)) &&
|
||||
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
|
||||
SignBitOnly, Depth + 1));
|
||||
|
||||
case Intrinsic::minnum:
|
||||
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
|
||||
Depth + 1) &&
|
||||
|
@ -1489,24 +1489,20 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
|
||||
|
||||
// Get the signbit at the right position for MagAsInt.
|
||||
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
|
||||
if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
|
||||
if (ShiftAmount > 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
|
||||
SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
|
||||
} else if (ShiftAmount < 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
|
||||
SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
|
||||
}
|
||||
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
|
||||
} else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
|
||||
EVT ShiftVT = IntVT;
|
||||
if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
|
||||
SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
|
||||
if (ShiftAmount > 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
|
||||
SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
|
||||
} else if (ShiftAmount < 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
|
||||
SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
|
||||
}
|
||||
ShiftVT = MagVT;
|
||||
}
|
||||
if (ShiftAmount > 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, ShiftVT);
|
||||
SignBit = DAG.getNode(ISD::SRL, DL, ShiftVT, SignBit, ShiftCnst);
|
||||
} else if (ShiftAmount < 0) {
|
||||
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
|
||||
SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
|
||||
}
|
||||
if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
|
||||
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
|
||||
}
|
||||
|
||||
// Store the part with the modified sign and convert back to float.
|
||||
|
@ -3641,26 +3641,43 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
|
||||
assert(OpNo == 3 && "Can widen only data operand of mstore");
|
||||
assert((OpNo == 2 || OpNo == 3) &&
|
||||
"Can widen only data or mask operand of mstore");
|
||||
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
|
||||
SDValue Mask = MST->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDValue StVal = MST->getValue();
|
||||
// Widen the value
|
||||
SDValue WideVal = GetWidenedVector(StVal);
|
||||
SDLoc dl(N);
|
||||
|
||||
// The mask should be widened as well.
|
||||
EVT WideVT = WideVal.getValueType();
|
||||
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
MaskVT.getVectorElementType(),
|
||||
WideVT.getVectorNumElements());
|
||||
Mask = ModifyToType(Mask, WideMaskVT, true);
|
||||
if (OpNo == 3) {
|
||||
// Widen the value
|
||||
StVal = GetWidenedVector(StVal);
|
||||
|
||||
// The mask should be widened as well.
|
||||
EVT WideVT = StVal.getValueType();
|
||||
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
MaskVT.getVectorElementType(),
|
||||
WideVT.getVectorNumElements());
|
||||
Mask = ModifyToType(Mask, WideMaskVT, true);
|
||||
} else {
|
||||
EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
|
||||
Mask = ModifyToType(Mask, WideMaskVT, true);
|
||||
|
||||
EVT ValueVT = StVal.getValueType();
|
||||
if (getTypeAction(ValueVT) == TargetLowering::TypeWidenVector)
|
||||
StVal = GetWidenedVector(StVal);
|
||||
else {
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
ValueVT.getVectorElementType(),
|
||||
WideMaskVT.getVectorNumElements());
|
||||
StVal = ModifyToType(StVal, WideVT);
|
||||
}
|
||||
}
|
||||
|
||||
assert(Mask.getValueType().getVectorNumElements() ==
|
||||
WideVal.getValueType().getVectorNumElements() &&
|
||||
StVal.getValueType().getVectorNumElements() &&
|
||||
"Mask and data vectors should have the same number of elements");
|
||||
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
|
||||
return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
|
||||
Mask, MST->getMemoryVT(), MST->getMemOperand(),
|
||||
false, MST->isCompressingStore());
|
||||
}
|
||||
|
@ -481,34 +481,6 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
|
||||
return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
|
||||
}
|
||||
|
||||
// Return a set of section flags based on the section name that can then
|
||||
// be augmented later, otherwise return 0 if we don't have any reasonable
|
||||
// defaults.
|
||||
static unsigned defaultSectionFlags(StringRef SectionName) {
|
||||
|
||||
if (hasPrefix(SectionName, ".rodata.cst"))
|
||||
return ELF::SHF_ALLOC | ELF::SHF_MERGE;
|
||||
|
||||
if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
|
||||
return ELF::SHF_ALLOC;
|
||||
|
||||
if (SectionName == ".fini" || SectionName == ".init" ||
|
||||
hasPrefix(SectionName, ".text."))
|
||||
return ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
|
||||
|
||||
if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
|
||||
hasPrefix(SectionName, ".bss.") ||
|
||||
hasPrefix(SectionName, ".init_array.") ||
|
||||
hasPrefix(SectionName, ".fini_array.") ||
|
||||
hasPrefix(SectionName, ".preinit_array."))
|
||||
return ELF::SHF_ALLOC | ELF::SHF_WRITE;
|
||||
|
||||
if (hasPrefix(SectionName, ".tdata.") || hasPrefix(SectionName, ".tbss."))
|
||||
return ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
|
||||
StringRef SectionName;
|
||||
|
||||
@ -518,13 +490,27 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
|
||||
StringRef TypeName;
|
||||
int64_t Size = 0;
|
||||
StringRef GroupName;
|
||||
unsigned Flags = 0;
|
||||
const MCExpr *Subsection = nullptr;
|
||||
bool UseLastGroup = false;
|
||||
MCSymbolELF *Associated = nullptr;
|
||||
int64_t UniqueID = ~0;
|
||||
|
||||
// Set the default section flags first in case no others are given.
|
||||
unsigned Flags = defaultSectionFlags(SectionName);
|
||||
// Set the defaults first.
|
||||
if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
|
||||
Flags |= ELF::SHF_ALLOC;
|
||||
else if (SectionName == ".fini" || SectionName == ".init" ||
|
||||
hasPrefix(SectionName, ".text."))
|
||||
Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
|
||||
else if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
|
||||
hasPrefix(SectionName, ".bss.") ||
|
||||
hasPrefix(SectionName, ".init_array.") ||
|
||||
hasPrefix(SectionName, ".fini_array.") ||
|
||||
hasPrefix(SectionName, ".preinit_array."))
|
||||
Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE;
|
||||
else if (hasPrefix(SectionName, ".tdata.") ||
|
||||
hasPrefix(SectionName, ".tbss."))
|
||||
Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
|
||||
|
||||
if (getLexer().is(AsmToken::Comma)) {
|
||||
Lex();
|
||||
@ -552,12 +538,6 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
|
||||
|
||||
if (extraFlags == -1U)
|
||||
return TokError("unknown flag");
|
||||
|
||||
// If we found additional section flags on a known section then give a
|
||||
// warning.
|
||||
if (Flags && Flags != extraFlags)
|
||||
Warning(loc, "setting incorrect section attributes for " + SectionName);
|
||||
|
||||
Flags |= extraFlags;
|
||||
|
||||
bool Mergeable = Flags & ELF::SHF_MERGE;
|
||||
|
@ -267,15 +267,6 @@ def FeatureD16PreservesUnusedBits : SubtargetFeature<
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
|
||||
// Some instructions do not support denormals despite this flag. Using
|
||||
// fp32 denormals also causes instructions to run at the double
|
||||
// precision rate for the device.
|
||||
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
|
||||
"FP32Denormals",
|
||||
"true",
|
||||
"Enable single precision denormal handling"
|
||||
>;
|
||||
|
||||
// Denormal handling for fp64 and fp16 is controlled by the same
|
||||
// config register when fp16 supported.
|
||||
// TODO: Do we need a separate f16 setting when not legal?
|
||||
|
@ -19,6 +19,15 @@ def FeatureFMA : SubtargetFeature<"fmaf",
|
||||
"Enable single precision FMA (not as fast as mul+add, but fused)"
|
||||
>;
|
||||
|
||||
// Some instructions do not support denormals despite this flag. Using
|
||||
// fp32 denormals also causes instructions to run at the double
|
||||
// precision rate for the device.
|
||||
def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
|
||||
"FP32Denormals",
|
||||
"true",
|
||||
"Enable single precision denormal handling"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
|
||||
"localmemorysize"#Value,
|
||||
"LocalMemorySize",
|
||||
|
@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
unsigned DwordOffset) const {
|
||||
unsigned ByteOffset = DwordOffset * 4;
|
||||
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUASI.CONSTANT_BUFFER_0);
|
||||
AMDGPUASI.PARAM_I_ADDRESS);
|
||||
|
||||
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
|
||||
assert(isInt<16>(ByteOffset));
|
||||
@ -1457,33 +1457,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
return scalarizeVectorLoad(LoadNode, DAG);
|
||||
}
|
||||
|
||||
// This is still used for explicit load from addrspace(8)
|
||||
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
||||
if (ConstantBlock > -1 &&
|
||||
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
|
||||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
|
||||
SDValue Result;
|
||||
if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
|
||||
isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
|
||||
if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
|
||||
isa<ConstantSDNode>(Ptr)) {
|
||||
SDValue Slots[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
// We want Const position encoded with the following formula :
|
||||
// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
|
||||
// const_index is Ptr computed by llvm using an alignment of 16.
|
||||
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
|
||||
// then div by 4 at the ISel step
|
||||
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
|
||||
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
|
||||
}
|
||||
EVT NewVT = MVT::v4i32;
|
||||
unsigned NumElements = 4;
|
||||
if (VT.isVector()) {
|
||||
NewVT = VT;
|
||||
NumElements = VT.getVectorNumElements();
|
||||
}
|
||||
Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
|
||||
return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
|
||||
} else {
|
||||
//TODO: Does this even work?
|
||||
// non-constant ptr can't be folded, keeps it as a v4f32 load
|
||||
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
|
||||
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
|
||||
@ -1622,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
}
|
||||
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUASI.CONSTANT_BUFFER_0);
|
||||
AMDGPUASI.PARAM_I_ADDRESS);
|
||||
|
||||
// i64 isn't a legal type, so the register type used ends up as i32, which
|
||||
// isn't expected here. It attempts to create this sextload, but it ends up
|
||||
@ -1646,17 +1630,17 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
|
||||
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
||||
unsigned PartOffset = VA.getLocMemOffset();
|
||||
unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
|
||||
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||
SDValue Arg = DAG.getLoad(
|
||||
ISD::UNINDEXED, Ext, VT, DL, Chain,
|
||||
DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
|
||||
PtrInfo,
|
||||
MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
|
||||
MemVT, Alignment, MachineMemOperand::MONonTemporal |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOInvariant);
|
||||
|
||||
// 4 is the preferred alignment for the CONSTANT memory space.
|
||||
InVals.push_back(Arg);
|
||||
}
|
||||
return Chain;
|
||||
@ -1804,6 +1788,52 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
|
||||
return BuildVector;
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(LoadNode);
|
||||
EVT VT = LoadNode->getValueType(0);
|
||||
SDValue Chain = LoadNode->getChain();
|
||||
SDValue Ptr = LoadNode->getBasePtr();
|
||||
assert (isa<ConstantSDNode>(Ptr));
|
||||
|
||||
//TODO: Support smaller loads
|
||||
if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
|
||||
return SDValue();
|
||||
|
||||
if (LoadNode->getAlignment() < 4)
|
||||
return SDValue();
|
||||
|
||||
int ConstantBlock = ConstantAddressBlock(Block);
|
||||
|
||||
SDValue Slots[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
// We want Const position encoded with the following formula :
|
||||
// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
|
||||
// const_index is Ptr computed by llvm using an alignment of 16.
|
||||
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
|
||||
// then div by 4 at the ISel step
|
||||
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
|
||||
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
|
||||
}
|
||||
EVT NewVT = MVT::v4i32;
|
||||
unsigned NumElements = 4;
|
||||
if (VT.isVector()) {
|
||||
NewVT = VT;
|
||||
NumElements = VT.getVectorNumElements();
|
||||
}
|
||||
SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
|
||||
if (!VT.isVector()) {
|
||||
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
}
|
||||
SDValue MergedValues[2] = {
|
||||
Result,
|
||||
Chain
|
||||
};
|
||||
return DAG.getMergeValues(MergedValues, DL);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Optimizations
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2022,6 +2052,16 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
|
||||
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
|
||||
}
|
||||
|
||||
case ISD::LOAD: {
|
||||
LoadSDNode *LoadNode = cast<LoadSDNode>(N);
|
||||
SDValue Ptr = LoadNode->getBasePtr();
|
||||
if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
|
||||
isa<ConstantSDNode>(Ptr))
|
||||
return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
|
||||
break;
|
||||
}
|
||||
|
||||
default: break;
|
||||
}
|
||||
|
||||
|
@ -98,9 +98,11 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
|
||||
bool isHWTrueValue(SDValue Op) const;
|
||||
bool isHWFalseValue(SDValue Op) const;
|
||||
|
||||
bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
|
||||
SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
|
||||
SelectionDAG &DAG) const;
|
||||
bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
|
||||
SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue constBufferLoad(LoadSDNode *LoadNode, int Block,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
|
||||
};
|
||||
|
@ -461,17 +461,6 @@ def : GCNPat <
|
||||
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
|
||||
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
|
||||
(REG_SEQUENCE VReg_64,
|
||||
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
|
||||
(V_MOV_B32_e32 (i32 0)), sub1)
|
||||
>;
|
||||
}
|
||||
|
||||
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/AtomicOrdering.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
@ -81,10 +82,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::LOAD:
|
||||
case ISD::ATOMIC_LOAD:
|
||||
if (tryLoad(N))
|
||||
return;
|
||||
break;
|
||||
case ISD::STORE:
|
||||
case ISD::ATOMIC_STORE:
|
||||
if (tryStore(N))
|
||||
return;
|
||||
break;
|
||||
@ -834,17 +837,27 @@ static Optional<unsigned> pickOpcodeForVT(
|
||||
|
||||
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
MemSDNode *LD = cast<MemSDNode>(N);
|
||||
assert(LD->readMem() && "Expected load");
|
||||
LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
|
||||
EVT LoadedVT = LD->getMemoryVT();
|
||||
SDNode *NVPTXLD = nullptr;
|
||||
|
||||
// do not support pre/post inc/dec
|
||||
if (LD->isIndexed())
|
||||
if (PlainLoad && PlainLoad->isIndexed())
|
||||
return false;
|
||||
|
||||
if (!LoadedVT.isSimple())
|
||||
return false;
|
||||
|
||||
AtomicOrdering Ordering = LD->getOrdering();
|
||||
// In order to lower atomic loads with stronger guarantees we would need to
|
||||
// use load.acquire or insert fences. However these features were only added
|
||||
// with PTX ISA 6.0 / sm_70.
|
||||
// TODO: Check if we can actually use the new instructions and implement them.
|
||||
if (isStrongerThanMonotonic(Ordering))
|
||||
return false;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
|
||||
if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
|
||||
@ -855,8 +868,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = LD->isVolatile();
|
||||
// - .volatile is only available for .global and .shared
|
||||
// - .volatile has the same memory synchronization semantics as .relaxed.sys
|
||||
bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
@ -882,7 +896,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
|
||||
fromTypeWidth = 32;
|
||||
}
|
||||
|
||||
if ((LD->getExtensionType() == ISD::SEXTLOAD))
|
||||
if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
|
||||
fromType = NVPTX::PTXLdStInstCode::Signed;
|
||||
else if (ScalarVT.isFloatingPoint())
|
||||
// f16 uses .b16 as its storage type.
|
||||
@ -1691,25 +1705,38 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
|
||||
|
||||
bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
MemSDNode *ST = cast<MemSDNode>(N);
|
||||
assert(ST->writeMem() && "Expected store");
|
||||
StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
|
||||
AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
|
||||
assert((PlainStore || AtomicStore) && "Expected store");
|
||||
EVT StoreVT = ST->getMemoryVT();
|
||||
SDNode *NVPTXST = nullptr;
|
||||
|
||||
// do not support pre/post inc/dec
|
||||
if (ST->isIndexed())
|
||||
if (PlainStore && PlainStore->isIndexed())
|
||||
return false;
|
||||
|
||||
if (!StoreVT.isSimple())
|
||||
return false;
|
||||
|
||||
AtomicOrdering Ordering = ST->getOrdering();
|
||||
// In order to lower atomic loads with stronger guarantees we would need to
|
||||
// use store.release or insert fences. However these features were only added
|
||||
// with PTX ISA 6.0 / sm_70.
|
||||
// TODO: Check if we can actually use the new instructions and implement them.
|
||||
if (isStrongerThanMonotonic(Ordering))
|
||||
return false;
|
||||
|
||||
// Address Space Setting
|
||||
unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
|
||||
unsigned int PointerSize =
|
||||
CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
|
||||
|
||||
// Volatile Setting
|
||||
// - .volatile is only availalble for .global and .shared
|
||||
bool isVolatile = ST->isVolatile();
|
||||
// - .volatile is only available for .global and .shared
|
||||
// - .volatile has the same memory synchronization semantics as .relaxed.sys
|
||||
bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
|
||||
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
|
||||
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
|
||||
@ -1739,41 +1766,53 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
toType = NVPTX::PTXLdStInstCode::Unsigned;
|
||||
|
||||
// Create the machine instruction DAG
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
SDValue N2 = N->getOperand(2);
|
||||
SDValue Chain = ST->getChain();
|
||||
SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
|
||||
SDValue BasePtr = ST->getBasePtr();
|
||||
SDValue Addr;
|
||||
SDValue Offset, Base;
|
||||
Optional<unsigned> Opcode;
|
||||
MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
|
||||
MVT::SimpleValueType SourceVT =
|
||||
Value.getNode()->getSimpleValueType(0).SimpleTy;
|
||||
|
||||
if (SelectDirectAddr(N2, Addr)) {
|
||||
if (SelectDirectAddr(BasePtr, Addr)) {
|
||||
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
|
||||
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
|
||||
NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
|
||||
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
|
||||
Chain };
|
||||
SDValue Ops[] = {Value,
|
||||
getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl),
|
||||
getI32Imm(toTypeWidth, dl),
|
||||
Addr,
|
||||
Chain};
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
|
||||
} else if (PointerSize == 64
|
||||
? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
|
||||
: SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
|
||||
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
|
||||
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
|
||||
NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
|
||||
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
|
||||
Offset, Chain };
|
||||
SDValue Ops[] = {Value,
|
||||
getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl),
|
||||
getI32Imm(toTypeWidth, dl),
|
||||
Base,
|
||||
Offset,
|
||||
Chain};
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
|
||||
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
|
||||
} else if (PointerSize == 64
|
||||
? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
|
||||
: SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
|
||||
if (PointerSize == 64)
|
||||
Opcode = pickOpcodeForVT(
|
||||
SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
|
||||
@ -1787,10 +1826,15 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
if (!Opcode)
|
||||
return false;
|
||||
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
|
||||
Offset, Chain };
|
||||
SDValue Ops[] = {Value,
|
||||
getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl),
|
||||
getI32Imm(toTypeWidth, dl),
|
||||
Base,
|
||||
Offset,
|
||||
Chain};
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
} else {
|
||||
if (PointerSize == 64)
|
||||
@ -1806,10 +1850,14 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
|
||||
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
|
||||
if (!Opcode)
|
||||
return false;
|
||||
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl), getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
|
||||
Chain };
|
||||
SDValue Ops[] = {Value,
|
||||
getI32Imm(isVolatile, dl),
|
||||
getI32Imm(CodeAddrSpace, dl),
|
||||
getI32Imm(vecType, dl),
|
||||
getI32Imm(toType, dl),
|
||||
getI32Imm(toTypeWidth, dl),
|
||||
BasePtr,
|
||||
Chain};
|
||||
NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
|
||||
}
|
||||
|
||||
|
@ -47,21 +47,17 @@ STATISTIC(ChecksUnable, "Bounds checks unable to add");
|
||||
|
||||
using BuilderTy = IRBuilder<TargetFolder>;
|
||||
|
||||
/// Adds run-time bounds checks to memory accessing instructions.
|
||||
/// Gets the conditions under which memory accessing instructions will overflow.
|
||||
///
|
||||
/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
|
||||
/// the result from the load or the value being stored. It is used to determine
|
||||
/// the size of memory block that is touched.
|
||||
///
|
||||
/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
|
||||
///
|
||||
/// Returns true if any change was made to the IR, false otherwise.
|
||||
template <typename GetTrapBBT>
|
||||
static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
|
||||
const DataLayout &DL, TargetLibraryInfo &TLI,
|
||||
ObjectSizeOffsetEvaluator &ObjSizeEval,
|
||||
BuilderTy &IRB, GetTrapBBT GetTrapBB,
|
||||
ScalarEvolution &SE) {
|
||||
/// Returns the condition under which the access will overflow.
|
||||
static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
|
||||
const DataLayout &DL, TargetLibraryInfo &TLI,
|
||||
ObjectSizeOffsetEvaluator &ObjSizeEval,
|
||||
BuilderTy &IRB, ScalarEvolution &SE) {
|
||||
uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
|
||||
LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
|
||||
<< " bytes\n");
|
||||
@ -70,7 +66,7 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
|
||||
|
||||
if (!ObjSizeEval.bothKnown(SizeOffset)) {
|
||||
++ChecksUnable;
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Value *Size = SizeOffset.first;
|
||||
@ -107,13 +103,23 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
|
||||
Or = IRB.CreateOr(Cmp1, Or);
|
||||
}
|
||||
|
||||
return Or;
|
||||
}
|
||||
|
||||
/// Adds run-time bounds checks to memory accessing instructions.
|
||||
///
|
||||
/// \p Or is the condition that should guard the trap.
|
||||
///
|
||||
/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
|
||||
template <typename GetTrapBBT>
|
||||
static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
|
||||
// check if the comparison is always false
|
||||
ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
|
||||
if (C) {
|
||||
++ChecksSkipped;
|
||||
// If non-zero, nothing to do.
|
||||
if (!C->getZExtValue())
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
++ChecksAdded;
|
||||
|
||||
@ -127,12 +133,11 @@ static bool instrumentMemAccess(Value *Ptr, Value *InstVal,
|
||||
// FIXME: We should really handle this differently to bypass the splitting
|
||||
// the block.
|
||||
BranchInst::Create(GetTrapBB(IRB), OldBB);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Create the conditional branch.
|
||||
BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
|
||||
@ -143,11 +148,25 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
|
||||
|
||||
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
|
||||
// touching instructions
|
||||
std::vector<Instruction *> WorkList;
|
||||
SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo;
|
||||
for (Instruction &I : instructions(F)) {
|
||||
if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
|
||||
isa<AtomicRMWInst>(I))
|
||||
WorkList.push_back(&I);
|
||||
Value *Or = nullptr;
|
||||
BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
|
||||
Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
|
||||
ObjSizeEval, IRB, SE);
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
|
||||
Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
|
||||
DL, TLI, ObjSizeEval, IRB, SE);
|
||||
} else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
|
||||
Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
|
||||
DL, TLI, ObjSizeEval, IRB, SE);
|
||||
} else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
|
||||
Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL,
|
||||
TLI, ObjSizeEval, IRB, SE);
|
||||
}
|
||||
if (Or)
|
||||
TrapInfo.push_back(std::make_pair(&I, Or));
|
||||
}
|
||||
|
||||
// Create a trapping basic block on demand using a callback. Depending on
|
||||
@ -176,29 +195,14 @@ static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
|
||||
return TrapBB;
|
||||
};
|
||||
|
||||
bool MadeChange = false;
|
||||
for (Instruction *Inst : WorkList) {
|
||||
// Add the checks.
|
||||
for (const auto &Entry : TrapInfo) {
|
||||
Instruction *Inst = Entry.first;
|
||||
BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
|
||||
MadeChange |= instrumentMemAccess(LI->getPointerOperand(), LI, DL, TLI,
|
||||
ObjSizeEval, IRB, GetTrapBB, SE);
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
|
||||
MadeChange |=
|
||||
instrumentMemAccess(SI->getPointerOperand(), SI->getValueOperand(),
|
||||
DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
|
||||
} else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
|
||||
MadeChange |=
|
||||
instrumentMemAccess(AI->getPointerOperand(), AI->getCompareOperand(),
|
||||
DL, TLI, ObjSizeEval, IRB, GetTrapBB, SE);
|
||||
} else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
|
||||
MadeChange |=
|
||||
instrumentMemAccess(AI->getPointerOperand(), AI->getValOperand(), DL,
|
||||
TLI, ObjSizeEval, IRB, GetTrapBB, SE);
|
||||
} else {
|
||||
llvm_unreachable("unknown Instruction type");
|
||||
}
|
||||
insertBoundsCheck(Entry.second, IRB, GetTrapBB);
|
||||
}
|
||||
return MadeChange;
|
||||
|
||||
return !TrapInfo.empty();
|
||||
}
|
||||
|
||||
PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
|
@ -1213,9 +1213,13 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
|
||||
auto fields = builder.beginStruct();
|
||||
|
||||
bool IsOpenCL = CGM.getLangOpts().OpenCL;
|
||||
bool IsWindows = CGM.getTarget().getTriple().isOSWindows();
|
||||
if (!IsOpenCL) {
|
||||
// isa
|
||||
fields.add(CGM.getNSConcreteGlobalBlock());
|
||||
if (IsWindows)
|
||||
fields.addNullPointer(CGM.Int8PtrPtrTy);
|
||||
else
|
||||
fields.add(CGM.getNSConcreteGlobalBlock());
|
||||
|
||||
// __flags
|
||||
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
|
||||
@ -1250,7 +1254,27 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
|
||||
|
||||
llvm::Constant *literal = fields.finishAndCreateGlobal(
|
||||
"__block_literal_global", blockInfo.BlockAlign,
|
||||
/*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
|
||||
/*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace);
|
||||
|
||||
// Windows does not allow globals to be initialised to point to globals in
|
||||
// different DLLs. Any such variables must run code to initialise them.
|
||||
if (IsWindows) {
|
||||
auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy,
|
||||
{}), llvm::GlobalValue::InternalLinkage, ".block_isa_init",
|
||||
&CGM.getModule());
|
||||
llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry",
|
||||
Init));
|
||||
b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(),
|
||||
b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity());
|
||||
b.CreateRetVoid();
|
||||
// We can't use the normal LLVM global initialisation array, because we
|
||||
// need to specify that this runs early in library initialisation.
|
||||
auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
|
||||
/*isConstant*/true, llvm::GlobalValue::InternalLinkage,
|
||||
Init, ".block_isa_init_ptr");
|
||||
InitVar->setSection(".CRT$XCLa");
|
||||
CGM.addUsedGlobal(InitVar);
|
||||
}
|
||||
|
||||
// Return a constant of the appropriately-casted type.
|
||||
llvm::Type *RequiredType =
|
||||
|
@ -3812,40 +3812,10 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
|
||||
// is. This allows code compiled with non-fragile ivars to work correctly
|
||||
// when linked against code which isn't (most of the time).
|
||||
llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
|
||||
if (!IvarOffsetPointer) {
|
||||
// This will cause a run-time crash if we accidentally use it. A value of
|
||||
// 0 would seem more sensible, but will silently overwrite the isa pointer
|
||||
// causing a great deal of confusion.
|
||||
uint64_t Offset = -1;
|
||||
// We can't call ComputeIvarBaseOffset() here if we have the
|
||||
// implementation, because it will create an invalid ASTRecordLayout object
|
||||
// that we are then stuck with forever, so we only initialize the ivar
|
||||
// offset variable with a guess if we only have the interface. The
|
||||
// initializer will be reset later anyway, when we are generating the class
|
||||
// description.
|
||||
if (!CGM.getContext().getObjCImplementation(
|
||||
const_cast<ObjCInterfaceDecl *>(ID)))
|
||||
Offset = ComputeIvarBaseOffset(CGM, ID, Ivar);
|
||||
|
||||
llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset,
|
||||
/*isSigned*/true);
|
||||
// Don't emit the guess in non-PIC code because the linker will not be able
|
||||
// to replace it with the real version for a library. In non-PIC code you
|
||||
// must compile with the fragile ABI if you want to use ivars from a
|
||||
// GCC-compiled class.
|
||||
if (CGM.getLangOpts().PICLevel) {
|
||||
llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
|
||||
Int32Ty, false,
|
||||
llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
|
||||
IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
|
||||
IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage,
|
||||
IvarOffsetGV, Name);
|
||||
} else {
|
||||
IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
|
||||
llvm::Type::getInt32PtrTy(VMContext), false,
|
||||
llvm::GlobalValue::ExternalLinkage, nullptr, Name);
|
||||
}
|
||||
}
|
||||
if (!IvarOffsetPointer)
|
||||
IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
|
||||
llvm::Type::getInt32PtrTy(VMContext), false,
|
||||
llvm::GlobalValue::ExternalLinkage, nullptr, Name);
|
||||
return IvarOffsetPointer;
|
||||
}
|
||||
|
||||
|
@ -127,6 +127,7 @@ namespace clang {
|
||||
CodeGenOpts, C, CoverageInfo)),
|
||||
LinkModules(std::move(LinkModules)) {
|
||||
FrontendTimesIsEnabled = TimePasses;
|
||||
llvm::TimePassesIsEnabled = TimePasses;
|
||||
}
|
||||
llvm::Module *getModule() const { return Gen->GetModule(); }
|
||||
std::unique_ptr<llvm::Module> takeModule() {
|
||||
|
@ -154,8 +154,12 @@ struct _Unwind_Control_Block {
|
||||
struct _Unwind_Exception {
|
||||
_Unwind_Exception_Class exception_class;
|
||||
_Unwind_Exception_Cleanup_Fn exception_cleanup;
|
||||
#if !defined (__USING_SJLJ_EXCEPTIONS__) && defined (__SEH__)
|
||||
_Unwind_Word private_[6];
|
||||
#else
|
||||
_Unwind_Word private_1;
|
||||
_Unwind_Word private_2;
|
||||
#endif
|
||||
/* The Itanium ABI requires that _Unwind_Exception objects are "double-word
|
||||
* aligned". GCC has interpreted this to mean "use the maximum useful
|
||||
* alignment for the target"; so do we. */
|
||||
|
@ -113,9 +113,15 @@ ParsedType Sema::getConstructorName(IdentifierInfo &II,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!InjectedClassName && CurClass->isInvalidDecl())
|
||||
if (!InjectedClassName) {
|
||||
if (!CurClass->isInvalidDecl()) {
|
||||
// FIXME: RequireCompleteDeclContext doesn't check dependent contexts
|
||||
// properly. Work around it here for now.
|
||||
Diag(SS.getLastQualifierNameLoc(),
|
||||
diag::err_incomplete_nested_name_spec) << CurClass << SS.getRange();
|
||||
}
|
||||
return ParsedType();
|
||||
assert(InjectedClassName && "couldn't find injected class name");
|
||||
}
|
||||
|
||||
QualType T = Context.getTypeDeclType(InjectedClassName);
|
||||
DiagnoseUseOfDecl(InjectedClassName, NameLoc);
|
||||
|
@ -205,7 +205,13 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber,
|
||||
|
||||
void ObjFile::readAssociativeDefinition(
|
||||
COFFSymbolRef Sym, const coff_aux_section_definition *Def) {
|
||||
SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())];
|
||||
readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj()));
|
||||
}
|
||||
|
||||
void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym,
|
||||
const coff_aux_section_definition *Def,
|
||||
uint32_t ParentSection) {
|
||||
SectionChunk *Parent = SparseChunks[ParentSection];
|
||||
|
||||
// If the parent is pending, it probably means that its section definition
|
||||
// appears after us in the symbol table. Leave the associated section as
|
||||
@ -225,6 +231,35 @@ void ObjFile::readAssociativeDefinition(
|
||||
}
|
||||
}
|
||||
|
||||
void ObjFile::recordPrevailingSymbolForMingw(
|
||||
COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
|
||||
// For comdat symbols in executable sections, where this is the copy
|
||||
// of the section chunk we actually include instead of discarding it,
|
||||
// add the symbol to a map to allow using it for implicitly
|
||||
// associating .[px]data$<func> sections to it.
|
||||
int32_t SectionNumber = Sym.getSectionNumber();
|
||||
SectionChunk *SC = SparseChunks[SectionNumber];
|
||||
if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
|
||||
StringRef Name;
|
||||
COFFObj->getSymbolName(Sym, Name);
|
||||
PrevailingSectionMap[Name] = SectionNumber;
|
||||
}
|
||||
}
|
||||
|
||||
void ObjFile::maybeAssociateSEHForMingw(
|
||||
COFFSymbolRef Sym, const coff_aux_section_definition *Def,
|
||||
const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) {
|
||||
StringRef Name;
|
||||
COFFObj->getSymbolName(Sym, Name);
|
||||
if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) {
|
||||
// For MinGW, treat .[px]data$<func> as implicitly associative to
|
||||
// the symbol <func>.
|
||||
auto ParentSym = PrevailingSectionMap.find(Name);
|
||||
if (ParentSym != PrevailingSectionMap.end())
|
||||
readAssociativeDefinition(Sym, Def, ParentSym->second);
|
||||
}
|
||||
}
|
||||
|
||||
Symbol *ObjFile::createRegular(COFFSymbolRef Sym) {
|
||||
SectionChunk *SC = SparseChunks[Sym.getSectionNumber()];
|
||||
if (Sym.isExternal()) {
|
||||
@ -248,19 +283,24 @@ void ObjFile::initializeSymbols() {
|
||||
std::vector<uint32_t> PendingIndexes;
|
||||
PendingIndexes.reserve(NumSymbols);
|
||||
|
||||
DenseMap<StringRef, uint32_t> PrevailingSectionMap;
|
||||
std::vector<const coff_aux_section_definition *> ComdatDefs(
|
||||
COFFObj->getNumberOfSections() + 1);
|
||||
|
||||
for (uint32_t I = 0; I < NumSymbols; ++I) {
|
||||
COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I));
|
||||
bool PrevailingComdat;
|
||||
if (COFFSym.isUndefined()) {
|
||||
Symbols[I] = createUndefined(COFFSym);
|
||||
} else if (COFFSym.isWeakExternal()) {
|
||||
Symbols[I] = createUndefined(COFFSym);
|
||||
uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex;
|
||||
WeakAliases.emplace_back(Symbols[I], TagIndex);
|
||||
} else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) {
|
||||
} else if (Optional<Symbol *> OptSym =
|
||||
createDefined(COFFSym, ComdatDefs, PrevailingComdat)) {
|
||||
Symbols[I] = *OptSym;
|
||||
if (Config->MinGW && PrevailingComdat)
|
||||
recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap);
|
||||
} else {
|
||||
// createDefined() returns None if a symbol belongs to a section that
|
||||
// was pending at the point when the symbol was read. This can happen in
|
||||
@ -278,9 +318,12 @@ void ObjFile::initializeSymbols() {
|
||||
|
||||
for (uint32_t I : PendingIndexes) {
|
||||
COFFSymbolRef Sym = check(COFFObj->getSymbol(I));
|
||||
if (auto *Def = Sym.getSectionDefinition())
|
||||
if (auto *Def = Sym.getSectionDefinition()) {
|
||||
if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
readAssociativeDefinition(Sym, Def);
|
||||
else if (Config->MinGW)
|
||||
maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap);
|
||||
}
|
||||
if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) {
|
||||
StringRef Name;
|
||||
COFFObj->getSymbolName(Sym, Name);
|
||||
@ -306,7 +349,9 @@ Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) {
|
||||
|
||||
Optional<Symbol *> ObjFile::createDefined(
|
||||
COFFSymbolRef Sym,
|
||||
std::vector<const coff_aux_section_definition *> &ComdatDefs) {
|
||||
std::vector<const coff_aux_section_definition *> &ComdatDefs,
|
||||
bool &Prevailing) {
|
||||
Prevailing = false;
|
||||
auto GetName = [&]() {
|
||||
StringRef S;
|
||||
COFFObj->getSymbolName(Sym, S);
|
||||
@ -352,7 +397,6 @@ Optional<Symbol *> ObjFile::createDefined(
|
||||
if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) {
|
||||
ComdatDefs[SectionNumber] = nullptr;
|
||||
Symbol *Leader;
|
||||
bool Prevailing;
|
||||
if (Sym.isExternal()) {
|
||||
std::tie(Leader, Prevailing) =
|
||||
Symtab->addComdat(this, GetName(), Sym.getGeneric());
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "Config.h"
|
||||
#include "lld/Common/LLVM.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/LTO/LTO.h"
|
||||
#include "llvm/Object/Archive.h"
|
||||
@ -157,10 +158,24 @@ class ObjFile : public InputFile {
|
||||
COFFSymbolRef COFFSym,
|
||||
const llvm::object::coff_aux_section_definition *Def);
|
||||
|
||||
void readAssociativeDefinition(
|
||||
COFFSymbolRef COFFSym,
|
||||
const llvm::object::coff_aux_section_definition *Def,
|
||||
uint32_t ParentSection);
|
||||
|
||||
void recordPrevailingSymbolForMingw(
|
||||
COFFSymbolRef COFFSym,
|
||||
llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
|
||||
|
||||
void maybeAssociateSEHForMingw(
|
||||
COFFSymbolRef Sym, const llvm::object::coff_aux_section_definition *Def,
|
||||
const llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap);
|
||||
|
||||
llvm::Optional<Symbol *>
|
||||
createDefined(COFFSymbolRef Sym,
|
||||
std::vector<const llvm::object::coff_aux_section_definition *>
|
||||
&ComdatDefs);
|
||||
&ComdatDefs,
|
||||
bool &PrevailingComdat);
|
||||
Symbol *createRegular(COFFSymbolRef Sym);
|
||||
Symbol *createUndefined(COFFSymbolRef Sym);
|
||||
|
||||
|
@ -116,7 +116,8 @@ void LinkerScript::expandMemoryRegions(uint64_t Size) {
|
||||
if (Ctx->MemRegion)
|
||||
expandMemoryRegion(Ctx->MemRegion, Size, Ctx->MemRegion->Name,
|
||||
Ctx->OutSec->Name);
|
||||
if (Ctx->LMARegion)
|
||||
// Only expand the LMARegion if it is different from MemRegion.
|
||||
if (Ctx->LMARegion && Ctx->MemRegion != Ctx->LMARegion)
|
||||
expandMemoryRegion(Ctx->LMARegion, Size, Ctx->LMARegion->Name,
|
||||
Ctx->OutSec->Name);
|
||||
}
|
||||
@ -750,6 +751,13 @@ MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *Sec) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static OutputSection *findFirstSection(PhdrEntry *Load) {
|
||||
for (OutputSection *Sec : OutputSections)
|
||||
if (Sec->PtLoad == Load)
|
||||
return Sec;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// This function assigns offsets to input sections and an output section
|
||||
// for a single sections command (e.g. ".text { *(.text); }").
|
||||
void LinkerScript::assignOffsets(OutputSection *Sec) {
|
||||
@ -775,8 +783,11 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
|
||||
// will set the LMA such that the difference between VMA and LMA for the
|
||||
// section is the same as the preceding output section in the same region
|
||||
// https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html
|
||||
// This, however, should only be done by the first "non-header" section
|
||||
// in the segment.
|
||||
if (PhdrEntry *L = Ctx->OutSec->PtLoad)
|
||||
L->LMAOffset = Ctx->LMAOffset;
|
||||
if (Sec == findFirstSection(L))
|
||||
L->LMAOffset = Ctx->LMAOffset;
|
||||
|
||||
// We can call this method multiple times during the creation of
|
||||
// thunks and want to start over calculation each time.
|
||||
@ -953,13 +964,6 @@ void LinkerScript::adjustSectionsAfterSorting() {
|
||||
}
|
||||
}
|
||||
|
||||
static OutputSection *findFirstSection(PhdrEntry *Load) {
|
||||
for (OutputSection *Sec : OutputSections)
|
||||
if (Sec->PtLoad == Load)
|
||||
return Sec;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static uint64_t computeBase(uint64_t Min, bool AllocateHeaders) {
|
||||
// If there is no SECTIONS or if the linkerscript is explicit about program
|
||||
// headers, do our best to allocate them.
|
||||
|
@ -1815,12 +1815,14 @@ template <class ELFT> std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs() {
|
||||
// Segments are contiguous memory regions that has the same attributes
|
||||
// (e.g. executable or writable). There is one phdr for each segment.
|
||||
// Therefore, we need to create a new phdr when the next section has
|
||||
// different flags or is loaded at a discontiguous address using AT linker
|
||||
// script command. At the same time, we don't want to create a separate
|
||||
// load segment for the headers, even if the first output section has
|
||||
// an AT attribute.
|
||||
// different flags or is loaded at a discontiguous address or memory
|
||||
// region using AT or AT> linker script command, respectively. At the same
|
||||
// time, we don't want to create a separate load segment for the headers,
|
||||
// even if the first output section has an AT or AT> attribute.
|
||||
uint64_t NewFlags = computeFlags(Sec->getPhdrFlags());
|
||||
if ((Sec->LMAExpr && Load->LastSec != Out::ProgramHeaders) ||
|
||||
if (((Sec->LMAExpr ||
|
||||
(Sec->LMARegion && (Sec->LMARegion != Load->FirstSec->LMARegion))) &&
|
||||
Load->LastSec != Out::ProgramHeaders) ||
|
||||
Sec->MemRegion != Load->FirstSec->MemRegion || Flags != NewFlags) {
|
||||
|
||||
Load = AddHdr(PT_LOAD, NewFlags);
|
||||
|
@ -87,24 +87,6 @@ class VMRange {
|
||||
void Dump(Stream *s, lldb::addr_t base_addr = 0,
|
||||
uint32_t addr_width = 8) const;
|
||||
|
||||
class ValueInRangeUnaryPredicate {
|
||||
public:
|
||||
ValueInRangeUnaryPredicate(lldb::addr_t value) : _value(value) {}
|
||||
bool operator()(const VMRange &range) const {
|
||||
return range.Contains(_value);
|
||||
}
|
||||
lldb::addr_t _value;
|
||||
};
|
||||
|
||||
class RangeInRangeUnaryPredicate {
|
||||
public:
|
||||
RangeInRangeUnaryPredicate(VMRange range) : _range(range) {}
|
||||
bool operator()(const VMRange &range) const {
|
||||
return range.Contains(_range);
|
||||
}
|
||||
const VMRange &_range;
|
||||
};
|
||||
|
||||
static bool ContainsValue(const VMRange::collection &coll,
|
||||
lldb::addr_t value);
|
||||
|
||||
|
@ -24,14 +24,16 @@ using namespace lldb_private;
|
||||
|
||||
bool VMRange::ContainsValue(const VMRange::collection &coll,
|
||||
lldb::addr_t value) {
|
||||
ValueInRangeUnaryPredicate in_range_predicate(value);
|
||||
return llvm::find_if(coll, in_range_predicate) != coll.end();
|
||||
return llvm::find_if(coll, [&](const VMRange &r) {
|
||||
return r.Contains(value);
|
||||
}) != coll.end();
|
||||
}
|
||||
|
||||
bool VMRange::ContainsRange(const VMRange::collection &coll,
|
||||
const VMRange &range) {
|
||||
RangeInRangeUnaryPredicate in_range_predicate(range);
|
||||
return llvm::find_if(coll, in_range_predicate) != coll.end();
|
||||
return llvm::find_if(coll, [&](const VMRange &r) {
|
||||
return r.Contains(range);
|
||||
}) != coll.end();
|
||||
}
|
||||
|
||||
void VMRange::Dump(Stream *s, lldb::addr_t offset, uint32_t addr_width) const {
|
||||
|
@ -1,3 +1,3 @@
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#define FREEBSD_CC_VERSION 1200015
|
||||
#define FREEBSD_CC_VERSION 1200016
|
||||
|
@ -8,4 +8,4 @@
|
||||
|
||||
#define CLANG_VENDOR "FreeBSD "
|
||||
|
||||
#define SVN_REVISION "338892"
|
||||
#define SVN_REVISION "339355"
|
||||
|
@ -7,4 +7,4 @@
|
||||
|
||||
#define LLD_REPOSITORY_STRING "FreeBSD"
|
||||
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
|
||||
#define LLD_REVISION_STRING "338892-1200005"
|
||||
#define LLD_REVISION_STRING "339355-1200005"
|
||||
|
Loading…
Reference in New Issue
Block a user