Vendor import of llvm release_39 branch r288513:
https://llvm.org/svn/llvm-project/llvm/branches/release_39@288513
This commit is contained in:
parent
6449741f4c
commit
2cf3bd4601
@ -20,11 +20,11 @@
|
||||
#include <ciso646> // So we can check the C++ standard lib macros.
|
||||
#include <functional>
|
||||
|
||||
// We use std::call_once on all Unix platforms except for NetBSD with
|
||||
// libstdc++. That platform has a bug they are working to fix, and they'll
|
||||
// remove the NetBSD checks once fixed.
|
||||
#if defined(LLVM_ON_UNIX) && \
|
||||
!(defined(__NetBSD__) && !defined(_LIBCPP_VERSION)) && !defined(__ppc__)
|
||||
// std::call_once from libc++ is used on all Unix platforms. Other
|
||||
// implementations like libstdc++ are known to have problems on NetBSD,
|
||||
// OpenBSD and PowerPC.
|
||||
#if defined(LLVM_ON_UNIX) && (defined(_LIBCPP_VERSION) || \
|
||||
!(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__ppc__)))
|
||||
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
|
||||
#else
|
||||
#define LLVM_THREADING_USE_STD_CALL_ONCE 0
|
||||
|
@ -2185,24 +2185,29 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
|
||||
// options. This is a trivially-generalized version of the code from
|
||||
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
|
||||
// 4.3.1).
|
||||
SDValue Mask =
|
||||
DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(),
|
||||
NVT.getSizeInBits() >> 1), dl, NVT);
|
||||
unsigned Bits = NVT.getSizeInBits();
|
||||
unsigned HalfBits = Bits >> 1;
|
||||
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
|
||||
NVT);
|
||||
SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
|
||||
SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
|
||||
|
||||
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
|
||||
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
|
||||
|
||||
SDValue Shift =
|
||||
DAG.getConstant(NVT.getSizeInBits() >> 1, dl,
|
||||
TLI.getShiftAmountTy(NVT, DAG.getDataLayout()));
|
||||
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
|
||||
if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
|
||||
// The type from TLI is too small to fit the shift amount we want.
|
||||
// Override it with i32. The shift will have to be legalized.
|
||||
ShiftAmtTy = MVT::i32;
|
||||
}
|
||||
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
|
||||
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
|
||||
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
|
||||
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
|
||||
|
||||
SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
|
||||
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL);
|
||||
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
|
||||
SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
|
||||
SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
|
||||
|
||||
@ -2211,14 +2216,14 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
|
||||
SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
|
||||
|
||||
SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
|
||||
DAG.getNode(ISD::MUL, dl, NVT, LL, RL),
|
||||
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
|
||||
DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
|
||||
Lo = DAG.getNode(ISD::ADD, dl, NVT, TH,
|
||||
Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
|
||||
DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
|
||||
|
||||
Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
|
||||
DAG.getNode(ISD::ADD, dl, NVT,
|
||||
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
|
||||
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
|
||||
DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
|
||||
return;
|
||||
}
|
||||
|
@ -2203,7 +2203,8 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
|
||||
}
|
||||
|
||||
void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
|
||||
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
// Legalize VOP2
|
||||
if (isVOP2(MI) || isVOPC(MI)) {
|
||||
@ -2321,8 +2322,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
|
||||
return;
|
||||
}
|
||||
|
||||
// Legalize MIMG
|
||||
if (isMIMG(MI)) {
|
||||
// Legalize MIMG and MUBUF/MTBUF for shaders.
|
||||
//
|
||||
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
|
||||
// scratch memory access. In both cases, the legalization never involves
|
||||
// conversion to the addr64 form.
|
||||
if (isMIMG(MI) ||
|
||||
(AMDGPU::isShader(MF.getFunction()->getCallingConv()) &&
|
||||
(isMUBUF(MI) || isMTBUF(MI)))) {
|
||||
MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
|
||||
if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) {
|
||||
unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI);
|
||||
@ -2337,9 +2344,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
|
||||
return;
|
||||
}
|
||||
|
||||
// Legalize MUBUF* instructions
|
||||
// Legalize MUBUF* instructions by converting to addr64 form.
|
||||
// FIXME: If we start using the non-addr64 instructions for compute, we
|
||||
// may need to legalize them here.
|
||||
// may need to legalize them as above. This especially applies to the
|
||||
// buffer_load_format_* variants and variants with idxen (or bothen).
|
||||
int SRsrcIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
|
||||
if (SRsrcIdx != -1) {
|
||||
|
@ -2029,6 +2029,7 @@ def SI_RETURN : PseudoInstSI <
|
||||
let hasSideEffects = 1;
|
||||
let SALU = 1;
|
||||
let hasNoSchedulingInfo = 1;
|
||||
let DisableWQM = 1;
|
||||
}
|
||||
|
||||
let Uses = [EXEC], Defs = [EXEC, VCC, M0],
|
||||
|
@ -219,13 +219,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
||||
markInstruction(MI, Flags, Worklist);
|
||||
GlobalFlags |= Flags;
|
||||
}
|
||||
|
||||
if (WQMOutputs && MBB.succ_empty()) {
|
||||
// This is a prolog shader. Make sure we go back to exact mode at the end.
|
||||
Blocks[&MBB].OutNeeds = StateExact;
|
||||
Worklist.push_back(&MBB);
|
||||
GlobalFlags |= StateExact;
|
||||
}
|
||||
}
|
||||
|
||||
return GlobalFlags;
|
||||
|
@ -634,7 +634,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
|
||||
}
|
||||
|
||||
if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
|
||||
!isa<GEPOperator>(V) && !isa<PHINode>(V))
|
||||
!isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
|
||||
// We've found some value that we can't explore which is different from
|
||||
// the base. Therefore we can't do this transformation.
|
||||
return false;
|
||||
|
@ -579,6 +579,13 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
|
||||
UndefValue::get(T), NewLoad, 0, Name));
|
||||
}
|
||||
|
||||
// Bail out if the array is too large. Ideally we would like to optimize
|
||||
// arrays of arbitrary size but this has a terrible impact on compile time.
|
||||
// The threshold here is chosen arbitrarily, maybe needs a little bit of
|
||||
// tuning.
|
||||
if (NumElements > 1024)
|
||||
return nullptr;
|
||||
|
||||
const DataLayout &DL = IC.getDataLayout();
|
||||
auto EltSize = DL.getTypeAllocSize(ET);
|
||||
auto Align = LI.getAlignment();
|
||||
@ -1081,6 +1088,13 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Bail out if the array is too large. Ideally we would like to optimize
|
||||
// arrays of arbitrary size but this has a terrible impact on compile time.
|
||||
// The threshold here is chosen arbitrarily, maybe needs a little bit of
|
||||
// tuning.
|
||||
if (NumElements > 1024)
|
||||
return false;
|
||||
|
||||
const DataLayout &DL = IC.getDataLayout();
|
||||
auto EltSize = DL.getTypeAllocSize(AT->getElementType());
|
||||
auto Align = SI.getAlignment();
|
||||
|
@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
|
||||
// Move all 'aggressive' instructions, which are defined in the
|
||||
// conditional parts of the if's up to the dominating block.
|
||||
if (IfBlock1)
|
||||
if (IfBlock1) {
|
||||
for (auto &I : *IfBlock1)
|
||||
I.dropUnknownNonDebugMetadata();
|
||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
||||
IfBlock1->getInstList(), IfBlock1->begin(),
|
||||
IfBlock1->getTerminator()->getIterator());
|
||||
if (IfBlock2)
|
||||
}
|
||||
if (IfBlock2) {
|
||||
for (auto &I : *IfBlock2)
|
||||
I.dropUnknownNonDebugMetadata();
|
||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
||||
IfBlock2->getInstList(), IfBlock2->begin(),
|
||||
IfBlock2->getTerminator()->getIterator());
|
||||
}
|
||||
|
||||
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
|
||||
// Change the PHI node into a select instruction.
|
||||
|
49
test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
Normal file
49
test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
Normal file
@ -0,0 +1,49 @@
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
|
||||
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
|
||||
|
||||
; Test that buffer_load_format with VGPR resource descriptor is properly
|
||||
; legalized.
|
||||
|
||||
; CHECK-LABEL: {{^}}test_none:
|
||||
; CHECK: buffer_load_format_x v0, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define amdgpu_vs float @test_none(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_idxen:
|
||||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen{{$}}
|
||||
define amdgpu_vs float @test_idxen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_offen:
|
||||
; CHECK: buffer_load_format_x v0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
|
||||
define amdgpu_vs float @test_offen(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_both:
|
||||
; CHECK: buffer_load_format_x v0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen{{$}}
|
||||
define amdgpu_vs float @test_both(<4 x i32> addrspace(2)* inreg %base, i32 %i) {
|
||||
main_body:
|
||||
%ptr = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %base, i32 %i
|
||||
%tmp2 = load <4 x i32>, <4 x i32> addrspace(2)* %ptr, align 32
|
||||
%tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
|
||||
ret float %tmp7
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
@ -17,17 +17,18 @@ main_body:
|
||||
;CHECK-LABEL: {{^}}test2:
|
||||
;CHECK-NEXT: ; %main_body
|
||||
;CHECK-NEXT: s_wqm_b64 exec, exec
|
||||
;CHECK: image_sample
|
||||
;CHECK-NOT: exec
|
||||
;CHECK: _load_dword v0,
|
||||
define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
|
||||
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
|
||||
main_body:
|
||||
%c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%c.2 = bitcast <4 x float> %c.1 to <4 x i32>
|
||||
%c.3 = extractelement <4 x i32> %c.2, i32 0
|
||||
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
|
||||
%data = load float, float addrspace(1)* %gep
|
||||
ret float %data
|
||||
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; ... but disabled for stores (and, in this simple case, not re-enabled).
|
||||
@ -414,6 +415,46 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Must return to exact at the end of a non-void returning shader,
|
||||
; otherwise the EXEC mask exported by the epilog will be wrong. This is true
|
||||
; even if the shader has no kills, because a kill could have happened in a
|
||||
; previous shader fragment.
|
||||
;
|
||||
; CHECK-LABEL: {{^}}test_nonvoid_return:
|
||||
; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
;
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
|
||||
%tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tex.i = bitcast <4 x float> %tex to <4 x i32>
|
||||
%dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable:
|
||||
; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
|
||||
; CHECK: s_wqm_b64 exec, exec
|
||||
;
|
||||
; CHECK: s_and_b64 exec, exec, [[LIVE]]
|
||||
; CHECK-NOT: exec
|
||||
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
|
||||
entry:
|
||||
%tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
%tex.i = bitcast <4 x float> %tex to <4 x i32>
|
||||
%dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
|
||||
|
||||
%cc = icmp sgt i32 %c, 0
|
||||
br i1 %cc, label %if, label %else
|
||||
|
||||
if:
|
||||
store volatile <4 x float> %dtex, <4 x float>* undef
|
||||
unreachable
|
||||
|
||||
else:
|
||||
ret <4 x float> %dtex
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
|
||||
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
|
||||
|
5938
test/CodeGen/X86/mul-i1024.ll
Normal file
5938
test/CodeGen/X86/mul-i1024.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -15,12 +15,17 @@ entry:
|
||||
; There is a lot of inter-register motion, and so matching the instruction
|
||||
; sequence will be fragile. There should be 6 underlying multiplications.
|
||||
; CHECK: imulq
|
||||
; CHECK: mulq
|
||||
; CHECK: imulq
|
||||
; CHECK: imulq
|
||||
; CHECK: mulq
|
||||
; CHECK: imulq
|
||||
; CHECK: imulq
|
||||
; CHECK: imulq
|
||||
; CHECK: mulq
|
||||
; CHECK: mulq
|
||||
; CHECK: mulq
|
||||
; CHECK: mulq
|
||||
; CHECK-NOT: imulq
|
||||
; CHECK-NOT: mulq
|
||||
; CHECK: retq
|
||||
|
||||
attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
|
1238
test/CodeGen/X86/mul-i512.ll
Normal file
1238
test/CodeGen/X86/mul-i512.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
; RUN: llvm-as -o %t.dst.bc %s
|
||||
; RUN: llvm-as -o %t.src.bc %S/Inputs/type-mapping-src.ll
|
||||
; RUN: llvm-lto %t.dst.bc %t.src.bc -o=/dev/null
|
||||
; RUN: llvm-lto %t.dst.bc %t.src.bc -o=%t.lto.bc
|
||||
|
||||
target triple = "x86_64-pc-windows-msvc18.0.0"
|
||||
|
||||
|
@ -167,4 +167,24 @@ lpad:
|
||||
; CHECK: ret i32* %[[PTR]]
|
||||
}
|
||||
|
||||
|
||||
@pr30402 = constant i64 3
|
||||
define i1 @test7() {
|
||||
entry:
|
||||
br label %bb7
|
||||
|
||||
bb7: ; preds = %bb10, %entry-block
|
||||
%phi = phi i64* [ @pr30402, %entry ], [ getelementptr inbounds (i64, i64* @pr30402, i32 1), %bb7 ]
|
||||
%cmp = icmp eq i64* %phi, getelementptr inbounds (i64, i64* @pr30402, i32 1)
|
||||
br i1 %cmp, label %bb10, label %bb7
|
||||
|
||||
bb10:
|
||||
ret i1 %cmp
|
||||
}
|
||||
; CHECK-LABEL: @test7(
|
||||
; CHECK: %[[phi:.*]] = phi i64* [ @pr30402, %entry ], [ getelementptr inbounds (i64, i64* @pr30402, i32 1), %bb7 ]
|
||||
; CHECK: %[[cmp:.*]] = icmp eq i64* %[[phi]], getelementptr inbounds (i64, i64* @pr30402, i32 1)
|
||||
; CHECK: ret i1 %[[cmp]]
|
||||
|
||||
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
@ -49,6 +49,15 @@ define void @storeArrayOfA([1 x %A]* %aa.ptr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @storeLargeArrayOfA([2000 x %A]* %aa.ptr) {
|
||||
; CHECK-LABEL: storeLargeArrayOfA
|
||||
; CHECK-NEXT: store [2000 x %A]
|
||||
; CHECK-NEXT: ret void
|
||||
%i1 = insertvalue [2000 x %A] undef, %A { %A__vtbl* @A__vtblZ }, 1
|
||||
store [2000 x %A] %i1, [2000 x %A]* %aa.ptr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
|
||||
; CHECK-LABEL: storeStructOfArrayOfA
|
||||
; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
|
||||
@ -179,6 +188,14 @@ define [2 x %B] @loadArrayOfB([2 x %B]* %ab.ptr) {
|
||||
ret [2 x %B] %1
|
||||
}
|
||||
|
||||
define [2000 x %B] @loadLargeArrayOfB([2000 x %B]* %ab.ptr) {
|
||||
; CHECK-LABEL: loadLargeArrayOfB
|
||||
; CHECK-NEXT: load [2000 x %B], [2000 x %B]* %ab.ptr, align 8
|
||||
; CHECK-NEXT: ret [2000 x %B]
|
||||
%1 = load [2000 x %B], [2000 x %B]* %ab.ptr, align 8
|
||||
ret [2000 x %B] %1
|
||||
}
|
||||
|
||||
%struct.S = type <{ i8, %struct.T }>
|
||||
%struct.T = type { i32, i32 }
|
||||
|
||||
|
31
test/Transforms/SimplifyCFG/PR29163.ll
Normal file
31
test/Transforms/SimplifyCFG/PR29163.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: opt -S -simplifycfg < %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@GV = external constant i64*
|
||||
|
||||
define i64* @test1(i1 %cond, i8* %P) {
|
||||
entry:
|
||||
br i1 %cond, label %if, label %then
|
||||
|
||||
then:
|
||||
%bc = bitcast i8* %P to i64*
|
||||
br label %join
|
||||
|
||||
if:
|
||||
%load = load i64*, i64** @GV, align 8, !dereferenceable !0
|
||||
br label %join
|
||||
|
||||
join:
|
||||
%phi = phi i64* [ %bc, %then ], [ %load, %if ]
|
||||
ret i64* %phi
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define i64* @test1(
|
||||
; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
|
||||
; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
|
||||
; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
|
||||
; CHECK: ret i64* %[[phi]]
|
||||
|
||||
|
||||
!0 = !{i64 8}
|
Loading…
Reference in New Issue
Block a user