Vendor import of llvm release_40 branch r293443:
https://llvm.org/svn/llvm-project/llvm/branches/release_40@293443
This commit is contained in:
parent
0b1bc8ea84
commit
e1838ca3d9
@ -555,6 +555,8 @@ if(LLVM_USE_SANITIZER)
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
|
@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id :
|
||||
GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
|
||||
Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
|
||||
|
||||
def int_amdgcn_implicit_buffer_ptr :
|
||||
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
|
||||
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
|
||||
return MayAlias;
|
||||
|
||||
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
|
||||
AAMDNodes(), V2, V2Size, V2AAInfo,
|
||||
nullptr, UnderlyingV2);
|
||||
AAMDNodes(), V2, MemoryLocation::UnknownSize,
|
||||
V2AAInfo, nullptr, UnderlyingV2);
|
||||
if (R != MustAlias)
|
||||
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
|
||||
// If V2 is known not to alias GEP base pointer, then the two values
|
||||
// cannot alias per GEP semantics: "A pointer value formed from a
|
||||
// getelementptr instruction is associated with the addresses associated
|
||||
// with the first operand of the getelementptr".
|
||||
// cannot alias per GEP semantics: "Any memory access must be done through
|
||||
// a pointer value associated with an address range of the memory access,
|
||||
// otherwise the behavior is undefined.".
|
||||
return R;
|
||||
|
||||
// If the max search depth is reached the result is undefined
|
||||
|
@ -919,7 +919,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
// If this isn't a LocalAsMetadata record, we're dropping it. This used
|
||||
// to be legal, but there's no upgrade path.
|
||||
auto dropRecord = [&] {
|
||||
MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
|
||||
MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
};
|
||||
if (Record.size() != 2) {
|
||||
dropRecord();
|
||||
@ -934,7 +935,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
|
||||
MetadataList.assignValue(
|
||||
LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_OLD_NODE: {
|
||||
@ -959,7 +961,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
} else
|
||||
Elts.push_back(nullptr);
|
||||
}
|
||||
MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
|
||||
MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_VALUE: {
|
||||
@ -972,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
|
||||
MetadataList.assignValue(
|
||||
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_DISTINCT_NODE:
|
||||
@ -985,7 +989,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
Elts.push_back(getMDOrNull(ID));
|
||||
MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
|
||||
: MDNode::get(Context, Elts),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_LOCATION: {
|
||||
@ -999,7 +1004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
Metadata *InlinedAt = getMDOrNull(Record[4]);
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_GENERIC_DEBUG: {
|
||||
@ -1019,7 +1025,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
DwarfOps.push_back(getMDOrNull(Record[I]));
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_SUBRANGE: {
|
||||
@ -1030,7 +1037,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(DISubrange,
|
||||
(Context, Record[1], unrotateSign(Record[2]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_ENUMERATOR: {
|
||||
@ -1041,7 +1049,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
|
||||
getMDString(Record[2]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_BASIC_TYPE: {
|
||||
@ -1053,7 +1062,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
GET_OR_DISTINCT(DIBasicType,
|
||||
(Context, Record[1], getMDString(Record[2]), Record[3],
|
||||
Record[4], Record[5])),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_DERIVED_TYPE: {
|
||||
@ -1069,7 +1079,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getDITypeRefOrNull(Record[5]),
|
||||
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
|
||||
Record[9], Flags, getDITypeRefOrNull(Record[11]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_COMPOSITE_TYPE: {
|
||||
@ -1134,7 +1145,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
if (!IsNotUsedInTypeRef && Identifier)
|
||||
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
|
||||
|
||||
MetadataList.assignValue(CT, NextMetadataNo++);
|
||||
MetadataList.assignValue(CT, NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_SUBROUTINE_TYPE: {
|
||||
@ -1151,7 +1163,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1165,7 +1178,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
(Context, getMDOrNull(Record[1]),
|
||||
getMDString(Record[2]), getMDString(Record[3]),
|
||||
getMDString(Record[4]), getMDString(Record[5]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1181,7 +1195,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
Record.size() == 3 ? DIFile::CSK_None
|
||||
: static_cast<DIFile::ChecksumKind>(Record[3]),
|
||||
Record.size() == 3 ? nullptr : getMDString(Record[4]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_COMPILE_UNIT: {
|
||||
@ -1200,7 +1215,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
Record.size() <= 14 ? 0 : Record[14],
|
||||
Record.size() <= 16 ? true : Record[16]);
|
||||
|
||||
MetadataList.assignValue(CU, NextMetadataNo++);
|
||||
MetadataList.assignValue(CU, NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
|
||||
// Move the Upgrade the list of subprograms.
|
||||
if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
|
||||
@ -1247,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getMDOrNull(Record[16 + Offset]), // declaration
|
||||
getMDOrNull(Record[17 + Offset]) // variables
|
||||
));
|
||||
MetadataList.assignValue(SP, NextMetadataNo++);
|
||||
MetadataList.assignValue(SP, NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
|
||||
// Upgrade sp->function mapping to function->sp mapping.
|
||||
if (HasFn) {
|
||||
@ -1272,7 +1289,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
GET_OR_DISTINCT(DILexicalBlock,
|
||||
(Context, getMDOrNull(Record[1]),
|
||||
getMDOrNull(Record[2]), Record[3], Record[4])),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_LEXICAL_BLOCK_FILE: {
|
||||
@ -1284,7 +1302,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
GET_OR_DISTINCT(DILexicalBlockFile,
|
||||
(Context, getMDOrNull(Record[1]),
|
||||
getMDOrNull(Record[2]), Record[3])),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_NAMESPACE: {
|
||||
@ -1298,7 +1317,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
(Context, getMDOrNull(Record[1]),
|
||||
getMDOrNull(Record[2]), getMDString(Record[3]),
|
||||
Record[4], ExportSymbols)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_MACRO: {
|
||||
@ -1310,7 +1330,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
GET_OR_DISTINCT(DIMacro,
|
||||
(Context, Record[1], Record[2], getMDString(Record[3]),
|
||||
getMDString(Record[4]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_MACRO_FILE: {
|
||||
@ -1322,7 +1343,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
GET_OR_DISTINCT(DIMacroFile,
|
||||
(Context, Record[1], Record[2], getMDOrNull(Record[3]),
|
||||
getMDOrNull(Record[4]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_TEMPLATE_TYPE: {
|
||||
@ -1333,7 +1355,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
|
||||
(Context, getMDString(Record[1]),
|
||||
getDITypeRefOrNull(Record[2]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_TEMPLATE_VALUE: {
|
||||
@ -1346,7 +1369,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
(Context, Record[1], getMDString(Record[2]),
|
||||
getDITypeRefOrNull(Record[3]),
|
||||
getMDOrNull(Record[4]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_GLOBAL_VAR: {
|
||||
@ -1364,7 +1388,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getMDOrNull(Record[4]), Record[5],
|
||||
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
|
||||
getMDOrNull(Record[10]), Record[11])),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
} else if (Version == 0) {
|
||||
// Upgrade old metadata, which stored a global variable reference or a
|
||||
// ConstantInt here.
|
||||
@ -1396,7 +1421,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getMDOrNull(Record[10]), AlignInBits));
|
||||
|
||||
auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
|
||||
MetadataList.assignValue(DGVE, NextMetadataNo++);
|
||||
MetadataList.assignValue(DGVE, NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
if (Attach)
|
||||
Attach->addDebugInfo(DGVE);
|
||||
} else
|
||||
@ -1429,7 +1455,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
|
||||
getDITypeRefOrNull(Record[5 + HasTag]),
|
||||
Record[6 + HasTag], Flags, AlignInBits)),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_EXPRESSION: {
|
||||
@ -1446,7 +1473,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
|
||||
MetadataList.assignValue(
|
||||
GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_GLOBAL_VAR_EXPR: {
|
||||
@ -1457,7 +1485,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
|
||||
(Context, getMDOrNull(Record[1]),
|
||||
getMDOrNull(Record[2]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_OBJC_PROPERTY: {
|
||||
@ -1471,7 +1500,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
getMDOrNull(Record[2]), Record[3],
|
||||
getMDString(Record[4]), getMDString(Record[5]),
|
||||
Record[6], getDITypeRefOrNull(Record[7]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_IMPORTED_ENTITY: {
|
||||
@ -1484,7 +1514,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
(Context, Record[1], getMDOrNull(Record[2]),
|
||||
getDITypeRefOrNull(Record[3]), Record[4],
|
||||
getMDString(Record[5]))),
|
||||
NextMetadataNo++);
|
||||
NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_STRING_OLD: {
|
||||
@ -1494,13 +1525,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
|
||||
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
|
||||
++NumMDStringLoaded;
|
||||
Metadata *MD = MDString::get(Context, String);
|
||||
MetadataList.assignValue(MD, NextMetadataNo++);
|
||||
MetadataList.assignValue(MD, NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_STRINGS: {
|
||||
auto CreateNextMDString = [&](StringRef Str) {
|
||||
++NumMDStringLoaded;
|
||||
MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
|
||||
MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo);
|
||||
NextMetadataNo++;
|
||||
};
|
||||
if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
|
||||
return Err;
|
||||
|
@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
|
||||
LD->getPointerInfo().getWithOffset(Offset),
|
||||
MinAlign(Align, Increment), MMOFlags, AAInfo);
|
||||
LdChain.push_back(L.getValue(1));
|
||||
if (L->getValueType(0).isVector()) {
|
||||
if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
|
||||
// Later code assumes the vector loads produced will be mergeable, so we
|
||||
// must pad the final entry up to the previous width. Scalars are
|
||||
// combined separately.
|
||||
SmallVector<SDValue, 16> Loads;
|
||||
Loads.push_back(L);
|
||||
unsigned size = L->getValueSizeInBits(0);
|
||||
|
@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
|
||||
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
|
||||
"Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
|
||||
|
||||
def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
|
||||
"AvoidQuadLdStPairs", "true",
|
||||
"Do not form quad load/store pair operations">;
|
||||
def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
|
||||
"Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
|
||||
|
||||
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
|
||||
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
|
||||
@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
|
||||
|
||||
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
|
||||
"Samsung Exynos-M1 processors",
|
||||
[FeatureAvoidQuadLdStPairs,
|
||||
[FeatureSlowPaired128,
|
||||
FeatureCRC,
|
||||
FeatureCrypto,
|
||||
FeatureCustomCheapAsMoveHandling,
|
||||
@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
|
||||
|
||||
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
|
||||
"Samsung Exynos-M2/M3 processors",
|
||||
[FeatureAvoidQuadLdStPairs,
|
||||
[FeatureSlowPaired128,
|
||||
FeatureCRC,
|
||||
FeatureCrypto,
|
||||
FeatureCustomCheapAsMoveHandling,
|
||||
|
@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
|
||||
return false;
|
||||
|
||||
// On some CPUs quad load/store pairs are slower than two single load/stores.
|
||||
if (Subtarget.avoidQuadLdStPairs()) {
|
||||
if (Subtarget.isPaired128Slow()) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
|
@ -79,7 +79,7 @@ protected:
|
||||
bool CustomAsCheapAsMove = false;
|
||||
bool UsePostRAScheduler = false;
|
||||
bool Misaligned128StoreIsSlow = false;
|
||||
bool AvoidQuadLdStPairs = false;
|
||||
bool Paired128IsSlow = false;
|
||||
bool UseAlternateSExtLoadCVTF32Pattern = false;
|
||||
bool HasArithmeticBccFusion = false;
|
||||
bool HasArithmeticCbzFusion = false;
|
||||
@ -189,7 +189,7 @@ public:
|
||||
}
|
||||
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
|
||||
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
|
||||
bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
|
||||
bool isPaired128Slow() const { return Paired128IsSlow; }
|
||||
bool useAlternateSExtLoadCVTF32Pattern() const {
|
||||
return UseAlternateSExtLoadCVTF32Pattern;
|
||||
}
|
||||
|
@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
|
||||
"Enable SI Machine Scheduler"
|
||||
>;
|
||||
|
||||
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
|
||||
// all OS-es on VI and newer hardware to avoid assertion failures due
|
||||
// to missing ADDR64 variants of MUBUF instructions.
|
||||
// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
|
||||
// instructions.
|
||||
|
||||
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
|
||||
"FlatForGlobal",
|
||||
"true",
|
||||
|
@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
|
||||
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
SIProgramInfo KernelInfo;
|
||||
if (STM.isAmdCodeObjectV2()) {
|
||||
if (STM.isAmdCodeObjectV2(*MF)) {
|
||||
getSIProgramInfo(KernelInfo, *MF);
|
||||
EmitAmdKernelCodeT(*MF, KernelInfo);
|
||||
}
|
||||
@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
|
||||
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
|
||||
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
|
||||
if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
|
||||
if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
|
||||
AMDGPUTargetStreamer *TS =
|
||||
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
|
||||
SmallString<128> SymbolName;
|
||||
@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
|
||||
|
||||
// FIXME: Should use getKernArgSize
|
||||
header.kernarg_segment_byte_size =
|
||||
STM.getKernArgSegmentSize(MFI->getABIArgOffset());
|
||||
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
|
||||
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
|
||||
header.workitem_vgpr_count = KernelInfo.NumVGPR;
|
||||
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
|
||||
|
@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
unsigned Opc
|
||||
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
||||
|
||||
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
|
||||
// omod
|
||||
SDValue Ops[8];
|
||||
|
||||
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
|
||||
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
|
||||
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
|
||||
CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
|
||||
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
||||
|
@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
SDLoc SL(N);
|
||||
switch (Opc) {
|
||||
case ISD::FADD: {
|
||||
if (!mayIgnoreSignedZero(N0))
|
||||
return SDValue();
|
||||
|
||||
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
|
||||
SDValue LHS = N0.getOperand(0);
|
||||
SDValue RHS = N0.getOperand(1);
|
||||
@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
}
|
||||
case ISD::FMA:
|
||||
case ISD::FMAD: {
|
||||
if (!mayIgnoreSignedZero(N0))
|
||||
return SDValue();
|
||||
|
||||
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
|
||||
SDValue LHS = N0.getOperand(0);
|
||||
SDValue MHS = N0.getOperand(1);
|
||||
@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
|
||||
NODE_NAME_CASE(KILL)
|
||||
NODE_NAME_CASE(DUMMY_CHAIN)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(SENDMSG)
|
||||
NODE_NAME_CASE(SENDMSGHALT)
|
||||
|
@ -119,6 +119,16 @@ protected:
|
||||
public:
|
||||
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
|
||||
|
||||
bool mayIgnoreSignedZero(SDValue Op) const {
|
||||
if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
|
||||
return true;
|
||||
|
||||
if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
|
||||
return BO->Flags.hasNoSignedZeros();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool isFAbsFree(EVT VT) const override;
|
||||
bool isFNegFree(EVT VT) const override;
|
||||
bool isTruncateFree(EVT Src, EVT Dest) const override;
|
||||
@ -320,6 +330,7 @@ enum NodeType : unsigned {
|
||||
INTERP_P2,
|
||||
PC_ADD_REL_OFFSET,
|
||||
KILL,
|
||||
DUMMY_CHAIN,
|
||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
|
@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
|
||||
// This argument to this node is a dword address.
|
||||
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
|
||||
|
||||
// Force dependencies for vector trunc stores
|
||||
def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
|
||||
|
||||
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
|
||||
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
|
||||
|
||||
|
@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
|
||||
ParseSubtargetFeatures(GPU, FullFS);
|
||||
|
||||
// Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
|
||||
// on VI and newer hardware to avoid assertion failures due to missing ADDR64
|
||||
// variants of MUBUF instructions.
|
||||
if (!hasAddr64() && !FS.contains("flat-for-global")) {
|
||||
FlatForGlobal = true;
|
||||
}
|
||||
|
||||
// FIXME: I don't think think Evergreen has any useful support for
|
||||
// denormals, but should be checked. Should we issue a warning somewhere
|
||||
// if someone tries to enable these?
|
||||
@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
|
||||
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
|
||||
}
|
||||
|
||||
unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
|
||||
unsigned ImplicitBytes = getImplicitArgNumBytes();
|
||||
unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
|
||||
unsigned ExplicitArgBytes) const {
|
||||
unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
|
||||
if (ImplicitBytes == 0)
|
||||
return ExplicitArgBytes;
|
||||
|
||||
|
@ -311,22 +311,31 @@ public:
|
||||
return EnableXNACK;
|
||||
}
|
||||
|
||||
bool isAmdCodeObjectV2() const {
|
||||
return isAmdHsaOS() || isMesa3DOS();
|
||||
bool isMesaKernel(const MachineFunction &MF) const {
|
||||
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
|
||||
}
|
||||
|
||||
// Covers VS/PS/CS graphics shaders
|
||||
bool isMesaGfxShader(const MachineFunction &MF) const {
|
||||
return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
|
||||
}
|
||||
|
||||
bool isAmdCodeObjectV2(const MachineFunction &MF) const {
|
||||
return isAmdHsaOS() || isMesaKernel(MF);
|
||||
}
|
||||
|
||||
/// \brief Returns the offset in bytes from the start of the input buffer
|
||||
/// of the first explicit kernel argument.
|
||||
unsigned getExplicitKernelArgOffset() const {
|
||||
return isAmdCodeObjectV2() ? 0 : 36;
|
||||
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
|
||||
return isAmdCodeObjectV2(MF) ? 0 : 36;
|
||||
}
|
||||
|
||||
unsigned getAlignmentForImplicitArgPtr() const {
|
||||
return isAmdHsaOS() ? 8 : 4;
|
||||
}
|
||||
|
||||
unsigned getImplicitArgNumBytes() const {
|
||||
if (isMesa3DOS())
|
||||
unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
|
||||
if (isMesaKernel(MF))
|
||||
return 16;
|
||||
if (isAmdHsaOS() && isOpenCLEnv())
|
||||
return 32;
|
||||
@ -585,7 +594,7 @@ public:
|
||||
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
|
||||
}
|
||||
|
||||
unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
|
||||
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
|
||||
|
||||
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
|
||||
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
|
||||
|
@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
llvm_unreachable("Unsupported private trunc store");
|
||||
}
|
||||
|
||||
SDValue Chain = Store->getChain();
|
||||
SDValue OldChain = Store->getChain();
|
||||
bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
|
||||
// Skip dummy
|
||||
SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
|
||||
SDValue BasePtr = Store->getBasePtr();
|
||||
SDValue Offset = Store->getOffset();
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
||||
|
||||
// Store dword
|
||||
// TODO: Can we be smarter about MachinePointerInfo?
|
||||
return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
|
||||
SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
|
||||
|
||||
// If we are part of expanded vector, make our neighbors depend on this store
|
||||
if (VectorTrunc) {
|
||||
// Make all other vector elements depend on this store
|
||||
Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
|
||||
DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
|
||||
}
|
||||
return NewStore;
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Neither LOCAL nor PRIVATE can do vectors at the moment
|
||||
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
|
||||
VT.isVector()) {
|
||||
if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
|
||||
// Add an extra level of chain to isolate this vector
|
||||
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
|
||||
// TODO: can the chain be replaced without creating a new store?
|
||||
SDValue NewStore = DAG.getTruncStore(
|
||||
NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
|
||||
MemVT, StoreNode->getAlignment(),
|
||||
StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
|
||||
StoreNode = cast<StoreSDNode>(NewStore);
|
||||
}
|
||||
|
||||
return scalarizeVectorStore(StoreNode, DAG);
|
||||
}
|
||||
|
||||
@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Put the mask in correct place
|
||||
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
|
||||
|
||||
// Put the mask in correct place
|
||||
// Put the value bits in correct place
|
||||
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
|
||||
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
|
||||
|
||||
@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
|
||||
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
||||
unsigned PartOffset = VA.getLocMemOffset();
|
||||
unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
|
||||
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
|
||||
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
||||
SDValue Arg = DAG.getLoad(
|
||||
|
@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
|
||||
|
||||
def MOV : R600_1OP <0x19, "MOV", []>;
|
||||
|
||||
|
||||
// This is a hack to get rid of DUMMY_CHAIN nodes.
|
||||
// Most DUMMY_CHAINs should be eliminated during legalization, but undef
|
||||
// values can sneak in some to selection.
|
||||
let isPseudo = 1, isCodeGenOnly = 1 in {
|
||||
def DUMMY_CHAIN : AMDGPUInst <
|
||||
(outs),
|
||||
(ins),
|
||||
"DUMMY_CHAIN",
|
||||
[(R600dummy_chain)]
|
||||
>;
|
||||
} // end let isPseudo = 1, isCodeGenOnly = 1
|
||||
|
||||
|
||||
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
|
||||
|
||||
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
|
||||
|
@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
|
||||
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
|
||||
if (ST.isAmdCodeObjectV2()) {
|
||||
if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
|
||||
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
|
||||
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
|
||||
}
|
||||
@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
|
||||
assert(ST.isAmdCodeObjectV2());
|
||||
assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
|
||||
MRI.addLiveIn(PreloadedPrivateBufferReg);
|
||||
MBB.addLiveIn(PreloadedPrivateBufferReg);
|
||||
}
|
||||
@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
bool CopyBuffer = ResourceRegUsed &&
|
||||
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
|
||||
ST.isAmdCodeObjectV2(MF) &&
|
||||
ScratchRsrcReg != PreloadedPrivateBufferReg;
|
||||
|
||||
// This needs to be careful of the copying order to avoid overwriting one of
|
||||
@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
|
||||
}
|
||||
|
||||
if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
|
||||
assert(!ST.isAmdCodeObjectV2());
|
||||
if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
|
||||
assert(!ST.isAmdCodeObjectV2(MF));
|
||||
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
|
||||
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
|
||||
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
|
||||
|
||||
// Use relocations to get the pointer, and setup the other bits manually.
|
||||
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
|
||||
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
if (MFI->hasPrivateMemoryInputPtr()) {
|
||||
unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
|
||||
|
||||
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
|
||||
const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
|
||||
|
||||
BuildMI(MBB, I, DL, Mov64, Rsrc01)
|
||||
.addReg(PreloadedPrivateBufferReg)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
} else {
|
||||
const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
|
||||
|
||||
PointerType *PtrTy =
|
||||
PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
|
||||
AMDGPUAS::CONSTANT_ADDRESS);
|
||||
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
|
||||
auto MMO = MF.getMachineMemOperand(PtrInfo,
|
||||
MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOInvariant |
|
||||
MachineMemOperand::MODereferenceable,
|
||||
0, 0);
|
||||
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
|
||||
.addReg(PreloadedPrivateBufferReg)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // glc
|
||||
.addMemOperand(MMO)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
}
|
||||
} else {
|
||||
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
|
||||
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
|
||||
|
||||
BuildMI(MBB, I, DL, SMovB32, Rsrc0)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD0")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
BuildMI(MBB, I, DL, SMovB32, Rsrc1)
|
||||
.addExternalSymbol("SCRATCH_RSRC_DWORD1")
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
}
|
||||
|
||||
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
|
||||
.addImm(Rsrc23 & 0xffffffff)
|
||||
|
@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
if (!AMDGPU::isShader(CallConv)) {
|
||||
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
|
||||
} else {
|
||||
assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
|
||||
assert(!Info->hasDispatchPtr() &&
|
||||
!Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
|
||||
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
|
||||
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
|
||||
@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
!Info->hasWorkItemIDZ());
|
||||
}
|
||||
|
||||
if (Info->hasPrivateMemoryInputPtr()) {
|
||||
unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
|
||||
MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
|
||||
CCInfo.AllocateReg(PrivateMemoryPtrReg);
|
||||
}
|
||||
|
||||
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
|
||||
if (Info->hasPrivateSegmentBuffer()) {
|
||||
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
|
||||
@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
if (VA.isMemLoc()) {
|
||||
VT = Ins[i].VT;
|
||||
EVT MemVT = VA.getLocVT();
|
||||
const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
|
||||
const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
|
||||
VA.getLocMemOffset();
|
||||
// The first 36 bytes of the input buffer contains information about
|
||||
// thread group and global sizes.
|
||||
@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
|
||||
HasStackObjects = true;
|
||||
|
||||
if (ST.isAmdCodeObjectV2()) {
|
||||
if (ST.isAmdCodeObjectV2(MF)) {
|
||||
if (HasStackObjects) {
|
||||
// If we have stack objects, we unquestionably need the private buffer
|
||||
// resource. For the Code Object V2 ABI, this will be the first 4 user
|
||||
@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
// TODO: Should this propagate fast-math-flags?
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_implicit_buffer_ptr: {
|
||||
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
|
||||
}
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
case Intrinsic::amdgcn_queue_ptr: {
|
||||
if (!Subtarget->isAmdCodeObjectV2()) {
|
||||
if (!Subtarget->isAmdCodeObjectV2(MF)) {
|
||||
DiagnosticInfoUnsupported BadIntrin(
|
||||
*MF.getFunction(), "unsupported hsa intrinsic without hsa target",
|
||||
DL.getDebugLoc());
|
||||
|
@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
PrivateSegmentWaveByteOffset(false),
|
||||
WorkItemIDX(false),
|
||||
WorkItemIDY(false),
|
||||
WorkItemIDZ(false) {
|
||||
WorkItemIDZ(false),
|
||||
PrivateMemoryInputPtr(false) {
|
||||
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
|
||||
const Function *F = MF.getFunction();
|
||||
|
||||
@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
if (HasStackObjects || MaySpill)
|
||||
PrivateSegmentWaveByteOffset = true;
|
||||
|
||||
if (ST.isAmdCodeObjectV2()) {
|
||||
if (ST.isAmdCodeObjectV2(MF)) {
|
||||
if (HasStackObjects || MaySpill)
|
||||
PrivateSegmentBuffer = true;
|
||||
|
||||
@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
|
||||
if (F->hasFnAttribute("amdgpu-dispatch-id"))
|
||||
DispatchID = true;
|
||||
} else if (ST.isMesaGfxShader(MF)) {
|
||||
if (HasStackObjects || MaySpill)
|
||||
PrivateMemoryInputPtr = true;
|
||||
}
|
||||
|
||||
// We don't need to worry about accessing spills with flat instructions.
|
||||
@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
|
||||
return FlatScratchInitUserSGPR;
|
||||
}
|
||||
|
||||
unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
|
||||
PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
|
||||
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
|
||||
NumUserSGPRs += 2;
|
||||
return PrivateMemoryPtrUserSGPR;
|
||||
}
|
||||
|
||||
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
|
||||
MachineFunction *MF,
|
||||
unsigned FrameIndex,
|
||||
|
@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
||||
unsigned ScratchRSrcReg;
|
||||
unsigned ScratchWaveOffsetReg;
|
||||
|
||||
// Input registers for non-HSA ABI
|
||||
unsigned PrivateMemoryPtrUserSGPR;
|
||||
|
||||
// Input registers setup for the HSA ABI.
|
||||
// User SGPRs in allocation order.
|
||||
unsigned PrivateSegmentBufferUserSGPR;
|
||||
@ -163,6 +166,11 @@ private:
|
||||
bool WorkItemIDY : 1;
|
||||
bool WorkItemIDZ : 1;
|
||||
|
||||
// Private memory buffer
|
||||
// Compute directly in sgpr[0:1]
|
||||
// Other shaders indirect 64-bits at sgpr[0:1]
|
||||
bool PrivateMemoryInputPtr : 1;
|
||||
|
||||
MCPhysReg getNextUserSGPR() const {
|
||||
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
|
||||
return AMDGPU::SGPR0 + NumUserSGPRs;
|
||||
@ -198,6 +206,7 @@ public:
|
||||
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
|
||||
unsigned addDispatchID(const SIRegisterInfo &TRI);
|
||||
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
|
||||
unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
|
||||
|
||||
// Add system SGPRs.
|
||||
unsigned addWorkGroupIDX() {
|
||||
@ -302,6 +311,10 @@ public:
|
||||
return WorkItemIDZ;
|
||||
}
|
||||
|
||||
bool hasPrivateMemoryInputPtr() const {
|
||||
return PrivateMemoryInputPtr;
|
||||
}
|
||||
|
||||
unsigned getNumUserSGPRs() const {
|
||||
return NumUserSGPRs;
|
||||
}
|
||||
@ -338,6 +351,10 @@ public:
|
||||
return QueuePtrUserSGPR;
|
||||
}
|
||||
|
||||
unsigned getPrivateMemoryPtrUserSGPR() const {
|
||||
return PrivateMemoryPtrUserSGPR;
|
||||
}
|
||||
|
||||
bool hasSpilledSGPRs() const {
|
||||
return HasSpilledSGPRs;
|
||||
}
|
||||
|
@ -1108,10 +1108,12 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
||||
case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
||||
return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
|
||||
case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
|
||||
assert(ST.isAmdCodeObjectV2() &&
|
||||
"Non-CodeObjectV2 ABI currently uses relocations");
|
||||
assert(MFI->hasPrivateSegmentBuffer());
|
||||
return MFI->PrivateSegmentBufferUserSGPR;
|
||||
if (ST.isAmdCodeObjectV2(MF)) {
|
||||
assert(MFI->hasPrivateSegmentBuffer());
|
||||
return MFI->PrivateSegmentBufferUserSGPR;
|
||||
}
|
||||
assert(MFI->hasPrivateMemoryInputPtr());
|
||||
return MFI->PrivateMemoryPtrUserSGPR;
|
||||
case SIRegisterInfo::KERNARG_SEGMENT_PTR:
|
||||
assert(MFI->hasKernargSegmentPtr());
|
||||
return MFI->KernargSegmentPtrUserSGPR;
|
||||
|
@ -70,8 +70,10 @@ class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
|
||||
}
|
||||
|
||||
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
|
||||
// v_div_scale_{f32|f64} do not support input modifiers.
|
||||
let HasModifiers = 0;
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
|
||||
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
|
||||
}
|
||||
|
||||
def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
|
||||
@ -168,12 +170,14 @@ def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPU
|
||||
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
|
||||
let SchedRW = [WriteFloatFMA, WriteSALU];
|
||||
let hasExtraSrcRegAllocReq = 1;
|
||||
let AsmMatchConverter = "";
|
||||
}
|
||||
|
||||
// Double precision division pre-scale.
|
||||
def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
|
||||
let SchedRW = [WriteDouble, WriteSALU];
|
||||
let hasExtraSrcRegAllocReq = 1;
|
||||
let AsmMatchConverter = "";
|
||||
}
|
||||
|
||||
def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
|
||||
|
@ -164,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Emit the rest of the function body.
|
||||
EmitFunctionBody();
|
||||
|
||||
// Emit the XRay table for this function.
|
||||
emitXRayTable();
|
||||
|
||||
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
|
||||
// These are created per function, rather than per TU, since it's
|
||||
// relatively easy to exceed the thumb branch range within a TU.
|
||||
|
@ -7571,11 +7571,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
case ISD::MUL: return LowerMUL(Op, DAG);
|
||||
case ISD::SDIV:
|
||||
if (Subtarget->isTargetWindows())
|
||||
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
|
||||
return LowerDIV_Windows(Op, DAG, /* Signed */ true);
|
||||
return LowerSDIV(Op, DAG);
|
||||
case ISD::UDIV:
|
||||
if (Subtarget->isTargetWindows())
|
||||
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
|
||||
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
|
||||
return LowerUDIV(Op, DAG);
|
||||
case ISD::ADDC:
|
||||
|
@ -31272,93 +31272,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
|
||||
}
|
||||
|
||||
/// Check if truncation with saturation form type \p SrcVT to \p DstVT
|
||||
/// is valid for the given \p Subtarget.
|
||||
static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (!Subtarget.hasAVX512())
|
||||
return false;
|
||||
|
||||
// FIXME: Scalar type may be supported if we move it to vector register.
|
||||
if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
|
||||
return false;
|
||||
|
||||
EVT SrcElVT = SrcVT.getScalarType();
|
||||
EVT DstElVT = DstVT.getScalarType();
|
||||
if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
|
||||
return false;
|
||||
if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
|
||||
return false;
|
||||
if (SrcVT.is512BitVector() || Subtarget.hasVLX())
|
||||
return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if VPACK* instruction can be used for the given types
|
||||
/// and it is avalable on \p Subtarget.
|
||||
static bool
|
||||
isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
|
||||
if (Subtarget.hasSSE2())
|
||||
// v16i16 -> v16i8
|
||||
if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
|
||||
return true;
|
||||
if (Subtarget.hasSSE41())
|
||||
// v8i32 -> v8i16
|
||||
if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Detect a pattern of truncation with saturation:
|
||||
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
|
||||
/// Return the source value to be truncated or SDValue() if the pattern was not
|
||||
/// matched.
|
||||
static SDValue detectUSatPattern(SDValue In, EVT VT) {
|
||||
if (In.getOpcode() != ISD::UMIN)
|
||||
return SDValue();
|
||||
|
||||
//Saturation with truncation. We truncate from InVT to VT.
|
||||
assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
|
||||
"Unexpected types for truncate operation");
|
||||
|
||||
APInt C;
|
||||
if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
|
||||
// C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
|
||||
// the element size of the destination type.
|
||||
return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
|
||||
SDValue();
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Detect a pattern of truncation with saturation:
|
||||
/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
|
||||
/// The types should allow to use VPMOVUS* instruction on AVX512.
|
||||
/// Return the source value to be truncated or SDValue() if the pattern was not
|
||||
/// matched.
|
||||
static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
|
||||
return SDValue();
|
||||
return detectUSatPattern(In, VT);
|
||||
}
|
||||
|
||||
static SDValue
|
||||
combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDValue USatVal = detectUSatPattern(In, VT);
|
||||
if (USatVal) {
|
||||
if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
|
||||
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
|
||||
if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
|
||||
SDValue Lo, Hi;
|
||||
std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
|
||||
return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
|
||||
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
|
||||
/// X86ISD::AVG instruction.
|
||||
@ -31925,12 +31838,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
St->getPointerInfo(), St->getAlignment(),
|
||||
St->getMemOperand()->getFlags());
|
||||
|
||||
if (SDValue Val =
|
||||
detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
|
||||
return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
|
||||
dl, Val, St->getBasePtr(),
|
||||
St->getMemoryVT(), St->getMemOperand(), DAG);
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
assert(StVT != VT && "Cannot truncate to the same type");
|
||||
@ -32551,10 +32458,6 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
|
||||
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
|
||||
return Avg;
|
||||
|
||||
// Try to combine truncation with unsigned saturation.
|
||||
if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
|
||||
return Val;
|
||||
|
||||
// The bitcast source is a direct mmx result.
|
||||
// Detect bitcasts between i32 to x86mmx
|
||||
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
|
||||
@ -33790,11 +33693,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
|
||||
}
|
||||
}
|
||||
|
||||
// Try to synthesize horizontal adds from adds of shuffles.
|
||||
// Try to synthesize horizontal subs from subs of shuffles.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
|
||||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
|
||||
isHorizontalBinOp(Op0, Op1, true))
|
||||
isHorizontalBinOp(Op0, Op1, false))
|
||||
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
|
||||
|
||||
return OptimizeConditionalInDecrement(N, DAG);
|
||||
|
@ -1436,6 +1436,14 @@ static bool canSinkInstructions(
|
||||
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
|
||||
I->getType()->isTokenTy())
|
||||
return false;
|
||||
|
||||
// Conservatively return false if I is an inline-asm instruction. Sinking
|
||||
// and merging inline-asm instructions can potentially create arguments
|
||||
// that cannot satisfy the inline-asm constraints.
|
||||
if (const auto *C = dyn_cast<CallInst>(I))
|
||||
if (C->isInlineAsm())
|
||||
return false;
|
||||
|
||||
// Everything must have only one use too, apart from stores which
|
||||
// have no uses.
|
||||
if (!isa<StoreInst>(I) && !I->hasOneUse())
|
||||
|
19
test/Analysis/BasicAA/pr31761.ll
Normal file
19
test/Analysis/BasicAA/pr31761.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.12.0"
|
||||
|
||||
%struct.blam = type { i32, i32 }
|
||||
|
||||
|
||||
; CHECK-DAG: MayAlias: i32* %tmp, i32* %tmp3
|
||||
|
||||
define i1 @ham(%struct.blam* %arg) {
|
||||
%isNull = icmp eq %struct.blam* %arg, null
|
||||
%tmp = getelementptr %struct.blam, %struct.blam* %arg, i64 0, i32 0
|
||||
%tmp2 = getelementptr %struct.blam, %struct.blam* %arg, i64 0, i32 1
|
||||
%select = select i1 %isNull, i32* null, i32* %tmp2
|
||||
%tmp3 = getelementptr i32, i32* %select, i32 -1
|
||||
ret i1 true
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+no-quad-ldst-pairs -verify-machineinstrs -asm-verbose=false | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+slow-paired-128 -verify-machineinstrs -asm-verbose=false | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=aarch64-eabi -mcpu=exynos-m1 -verify-machineinstrs -asm-verbose=false | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test_nopair_st
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
|
||||
; the global address space(1) uses 64-bit pointers. These tests check to make sure
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
|
||||
|
||||
; FIXME: Need to handle non-uniform case for function below (load without gep).
|
||||
; GCN-LABEL: {{^}}v_test_add_i16:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
;FUNC-LABEL: {{^}}test1:
|
||||
|
@ -2,8 +2,8 @@
|
||||
; RUN: llc -mattr=+promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE -check-prefix=HSA %s
|
||||
; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s
|
||||
; RUN: llc -mattr=-promote-alloca,-flat-for-global -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA -check-prefix=HSA %s
|
||||
; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s
|
||||
; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s
|
||||
; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-PROMOTE %s
|
||||
; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-ALLOCA %s
|
||||
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; Legacy intrinsics that just read implicit parameters
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
|
||||
; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_add_local:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_sub_local:
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_branch:
|
||||
; GCNNOOPT: v_writelane_b32
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
|
||||
; BFI_INT Definition pattern from ISA docs
|
||||
; (y & x) | (z & ~x)
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}bfm_pattern:
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; The bitcast should be pushed through the bitcasts so the vectors can
|
||||
; be broken down and the shared components can be CSEd
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
|
||||
|
||||
declare i16 @llvm.bitreverse.i16(i16) #1
|
||||
declare i32 @llvm.bitreverse.i32(i32) #1
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}br_cc_f16
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; This used to crash because during intermediate control flow lowering, there
|
||||
; was a sequence
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.bswap.i32(i32) nounwind readnone
|
||||
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
|
||||
|
@ -1,7 +1,5 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
|
||||
target triple = "amdgcn--"
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
;
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI
|
||||
|
||||
; R600: {{^}}build_vector2:
|
||||
; R600: MOV
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
|
||||
; OPT: getelementptr i32, i32 addrspace(4)* %in
|
||||
|
@ -1,9 +1,9 @@
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
||||
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; OPT-LABEL: @test_sink_global_small_offset_i32(
|
||||
; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -codegenprepare < %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; This particular case will actually be worse in terms of code size
|
||||
; from sinking into both.
|
||||
|
@ -1,26 +0,0 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
|
||||
|
||||
|
||||
; There are no stack objects even though flat is used by default, so
|
||||
; flat_scratch_init should be disabled.
|
||||
|
||||
; ALL-LABEL: {{^}}test:
|
||||
; HSA: .amd_kernel_code_t
|
||||
; HSA: enable_sgpr_flat_scratch_init = 0
|
||||
; HSA: .end_amd_kernel_code_t
|
||||
|
||||
; ALL-NOT: flat_scr
|
||||
|
||||
; HSA-DEFAULT: flat_store_dword
|
||||
; HSA-NODEFAULT: buffer_store_dword
|
||||
|
||||
; NOHSA-DEFAULT: buffer_store_dword
|
||||
; NOHSA-NODEFAULT: flat_store_dword
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}test_concat_v1i32:
|
||||
; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}fold_mi_v_and_0:
|
||||
; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s
|
||||
; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -verify-machineinstrs < %s
|
||||
|
||||
; Test that CopyToReg instructions don't have non-register operands prior
|
||||
; to being emitted.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC -check-prefix=GCN %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
|
||||
|
@ -14,7 +14,7 @@ declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
|
||||
; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: _store_dwordx4
|
||||
define void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c) #1 {
|
||||
%cubeid = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
|
||||
%cubesc = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c)
|
||||
@ -34,7 +34,7 @@ define void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c)
|
||||
; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: _store_dwordx4
|
||||
define void @legacy_cube(<4 x float> addrspace(1)* %out, <4 x float> %abcx) #1 {
|
||||
%cube = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %abcx)
|
||||
store <4 x float> %cube, <4 x float> addrspace(1)* %out
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
|
||||
|
||||
; Test that we don't try to produce a COFF file on windows
|
||||
; RUN: llc < %s -mtriple=amdgcn-pc-mingw -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
; RUN: llc < %s -mtriple=amdgcn-pc-mingw -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s
|
||||
|
||||
; ELF: Format: ELF64
|
||||
; ELF: OS/ABI: AMDGPU_HSA (0x40)
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: This seems to not ever actually become an extload
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2:
|
||||
; GCN: buffer_load_dwordx4
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}extract_vector_elt_v2i16:
|
||||
; GCN: buffer_load_ushort
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; How the replacement of i64 stores with v2i32 stores resulted in
|
||||
; breaking other users of the bitcast if they already existed
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}extract_vector_elt_v1i8:
|
||||
; GCN: buffer_load_ubyte
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}fadd_f16
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
|
||||
|
||||
; FUNC-LABEL: {{^}}fadd_f32:
|
||||
|
@ -23,7 +23,7 @@ define void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
|
||||
; CHECK-LABEL: {{^}}v_fadd_v2f64:
|
||||
; CHECK: v_add_f64
|
||||
; CHECK: v_add_f64
|
||||
; CHECK: buffer_store_dwordx4
|
||||
; CHECK: _store_dwordx4
|
||||
define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
|
||||
<2 x double> addrspace(1)* %in2) {
|
||||
%r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
|
||||
@ -36,7 +36,7 @@ define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspac
|
||||
; CHECK-LABEL: {{^}}s_fadd_v2f64:
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: buffer_store_dwordx4
|
||||
; CHECK: _store_dwordx4
|
||||
define void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) {
|
||||
%r2 = fadd <2 x double> %r0, %r1
|
||||
store <2 x double> %r2, <2 x double> addrspace(1)* %out
|
||||
|
@ -167,6 +167,6 @@ define void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" }
|
||||
attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }
|
||||
attributes #1 = { nounwind "target-features"="-flat-for-global" }
|
||||
attributes #2 = { nounwind "target-features"="-flat-for-global,-fp16-denormals,-fp16-denormals" }
|
||||
attributes #3 = { nounwind "target-features"="-flat-for-global,+fp16-denormals,+fp64-denormals" }
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.ceil.f32(float) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}fcmp_f16_lt
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}flt_f64:
|
||||
; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.copysign.f32(float, float) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.copysign.f64(double, double) nounwind readnone
|
||||
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; Make sure fdiv is promoted to f32.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s
|
||||
|
||||
|
||||
; COMMON-LABEL: {{^}}fdiv_f64:
|
||||
|
@ -252,8 +252,8 @@ define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> ad
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
|
||||
attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
|
||||
attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
|
||||
attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals,-flat-for-global" }
|
||||
attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals,-flat-for-global" }
|
||||
attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals,-flat-for-global" }
|
||||
|
||||
!0 = !{float 2.500000e+00}
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.fabs.f64(double %Val)
|
||||
declare double @llvm.floor.f64(double) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}floor_f32:
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s
|
||||
|
||||
; Disable optimizations in case there are optimizations added that
|
||||
; specialize away generic pointer accesses.
|
||||
|
54
test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Normal file
54
test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
|
||||
|
||||
|
||||
; There are no stack objects even though flat is used by default, so
|
||||
; flat_scratch_init should be disabled.
|
||||
|
||||
; ALL-LABEL: {{^}}test:
|
||||
; HSA: .amd_kernel_code_t
|
||||
; HSA: enable_sgpr_flat_scratch_init = 0
|
||||
; HSA: .end_amd_kernel_code_t
|
||||
|
||||
; ALL-NOT: flat_scr
|
||||
|
||||
; HSA-DEFAULT: flat_store_dword
|
||||
; HSA-NODEFAULT: buffer_store_dword
|
||||
; HSA-NOADDR64: flat_store_dword
|
||||
|
||||
; NOHSA-DEFAULT: buffer_store_dword
|
||||
; NOHSA-NODEFAULT: flat_store_dword
|
||||
; NOHSA-NOADDR64: flat_store_dword
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store i32 0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; HSA-DEFAULT: flat_store_dword
|
||||
; HSA-NODEFAULT: buffer_store_dword
|
||||
; HSA-NOADDR64: flat_store_dword
|
||||
|
||||
; NOHSA-DEFAULT: buffer_store_dword
|
||||
; NOHSA-NODEFAULT: flat_store_dword
|
||||
; NOHSA-NOADDR64: flat_store_dword
|
||||
define void @test_addr64(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%out.addr = alloca i32 addrspace(1)*, align 4
|
||||
|
||||
store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
|
||||
%ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
|
||||
|
||||
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
|
||||
store i32 1, i32 addrspace(1)* %arrayidx, align 4
|
||||
|
||||
%ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
|
||||
store i32 2, i32 addrspace(1)* %arrayidx1, align 4
|
||||
|
||||
ret void
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-CI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-VI-NOXNACK -check-prefix=GCN %s
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.fma.f64(double, double, double) nounwind readnone
|
||||
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare double @llvm.maxnum.f64(double, double) nounwind readnone
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare float @llvm.maxnum.f32(float, float) nounwind readnone
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare double @llvm.maxnum.f64(double, double) #0
|
||||
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.maxnum.f32(float, float) #0
|
||||
declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare float @llvm.minnum.f32(float, float) nounwind readnone
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare float @llvm.minnum.f32(float, float) #0
|
||||
|
@ -1,5 +1,5 @@
|
||||
; XUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
|
||||
; make add an instruction if the fadd has more than one use.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}fmul_f16
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}fmul_f32:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user