Vendor import of llvm 4.0.0 release r297347:
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_400/final@297347
This commit is contained in:
parent
365919ebc1
commit
31bbf64f3a
@ -30,7 +30,8 @@ from now, will be version 5.0.0.
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
* Minimum compiler version to build has been raised to GCC 4.8 and VS 2015.
|
||||
* The minimum compiler version required for building LLVM has been raised to
|
||||
4.8 for GCC and 2015 for Visual Studio.
|
||||
|
||||
* The C API functions ``LLVMAddFunctionAttr``, ``LLVMGetFunctionAttr``,
|
||||
``LLVMRemoveFunctionAttr``, ``LLVMAddAttribute``, ``LLVMRemoveAttribute``,
|
||||
@ -56,15 +57,8 @@ Non-comprehensive list of changes in this release
|
||||
with LLVM option ``-adce-remove-loops`` when the loop body otherwise has
|
||||
no live operations.
|
||||
|
||||
* The GVNHoist pass is now enabled by default. The new pass based on Global
|
||||
Value Numbering detects similar computations in branch code and replaces
|
||||
multiple instances of the same computation with a unique expression. The
|
||||
transform benefits code size and generates better schedules. GVNHoist is
|
||||
more aggressive at ``-Os`` and ``-Oz``, hoisting more expressions at the
|
||||
expense of execution time degradations.
|
||||
|
||||
* The llvm-cov tool can now export coverage data as json. Its html output mode
|
||||
has also improved.
|
||||
* The llvm-cov tool can now export coverage data as json. Its html output mode
|
||||
has also improved.
|
||||
|
||||
Improvements to ThinLTO (-flto=thin)
|
||||
------------------------------------
|
||||
@ -225,6 +219,10 @@ Changes to the ARM Targets
|
||||
A lot of work has also been done in LLD for ARM, which now supports more
|
||||
relocations and TLS.
|
||||
|
||||
Note: From the next release (5.0), the "vulcan" target will be renamed to
|
||||
"thunderx2t99", including command line options, assembly directives, etc. This
|
||||
release (4.0) will be the last one to accept "vulcan" as its name.
|
||||
|
||||
Changes to the AVR Target
|
||||
-----------------------------
|
||||
|
||||
@ -274,6 +272,15 @@ Changes to the MIPS Target
|
||||
* Fixed several crashes involving FastISel.
|
||||
* Corrected the corrected definitions for aui/daui/dahi/dati for MIPSR6.
|
||||
|
||||
Changes to the X86 Target
|
||||
-------------------------
|
||||
|
||||
**During this release the X86 target has:**
|
||||
|
||||
* Added support AMD Ryzen (znver1) CPUs.
|
||||
* Gained support for using VEX encoding on AVX-512 CPUs to reduce code size when possible.
|
||||
* Improved AVX-512 codegen.
|
||||
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
|
||||
@ -299,6 +306,34 @@ x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
|
||||
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
Portable Computing Language (pocl)
|
||||
----------------------------------
|
||||
|
||||
In addition to producing an easily portable open source OpenCL
|
||||
implementation, another major goal of `pocl <http://pocl.sourceforge.net/>`_
|
||||
is improving performance portability of OpenCL programs with
|
||||
compiler optimizations, reducing the need for target-dependent manual
|
||||
optimizations. An important part of pocl is a set of LLVM passes used to
|
||||
statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers. This enables static parallelization of
|
||||
the fine-grained static concurrency in the work groups in multiple ways.
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
|
||||
processors based on the Transport Triggered Architecture (TTA).
|
||||
The toolset provides a complete co-design flow from C/C++
|
||||
programs down to synthesizable VHDL/Verilog and parallel program binaries.
|
||||
Processor customization points include register files, function units,
|
||||
supported operations, and the interconnection network.
|
||||
|
||||
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
|
||||
optimizations and also for parts of code generation. It generates new
|
||||
LLVM-based code generators "on the fly" for the designed TTA processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
@ -127,10 +127,15 @@ static cl::opt<unsigned> MulOpsInlineThreshold(
|
||||
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
|
||||
cl::init(1000));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
|
||||
cl::desc("Maximum depth of recursive compare complexity"),
|
||||
cl::init(32));
|
||||
static cl::opt<unsigned> MaxSCEVCompareDepth(
|
||||
"scalar-evolution-max-scev-compare-depth", cl::Hidden,
|
||||
cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
|
||||
cl::init(32));
|
||||
|
||||
static cl::opt<unsigned> MaxValueCompareDepth(
|
||||
"scalar-evolution-max-value-compare-depth", cl::Hidden,
|
||||
cl::desc("Maximum depth of recursive value complexity comparisons"),
|
||||
cl::init(2));
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SCEV class definitions
|
||||
@ -481,7 +486,7 @@ static int
|
||||
CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
|
||||
const LoopInfo *const LI, Value *LV, Value *RV,
|
||||
unsigned Depth) {
|
||||
if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
|
||||
if (Depth > MaxValueCompareDepth || EqCache.count({LV, RV}))
|
||||
return 0;
|
||||
|
||||
// Order pointer values after integer values. This helps SCEVExpander form
|
||||
@ -568,7 +573,7 @@ static int CompareSCEVComplexity(
|
||||
if (LType != RType)
|
||||
return (int)LType - (int)RType;
|
||||
|
||||
if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
|
||||
if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.count({LHS, RHS}))
|
||||
return 0;
|
||||
// Aside from the getSCEVType() ordering, the particular ordering
|
||||
// isn't very important except that it's beneficial to be consistent,
|
||||
|
@ -141,8 +141,8 @@ static cl::opt<int> PreInlineThreshold(
|
||||
"(default = 75)"));
|
||||
|
||||
static cl::opt<bool> EnableGVNHoist(
|
||||
"enable-gvn-hoist", cl::init(true), cl::Hidden,
|
||||
cl::desc("Enable the GVN hoisting pass (default = on)"));
|
||||
"enable-gvn-hoist", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the GVN hoisting pass"));
|
||||
|
||||
static cl::opt<bool>
|
||||
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
|
||||
|
@ -200,13 +200,11 @@ static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
|
||||
class GVNHoist {
|
||||
public:
|
||||
GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
|
||||
MemorySSA *MSSA, bool OptForMinSize)
|
||||
: DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize),
|
||||
HoistingGeps(OptForMinSize), HoistedCtr(0) {
|
||||
// Hoist as far as possible when optimizing for code-size.
|
||||
if (OptForMinSize)
|
||||
MaxNumberOfBBSInPath = -1;
|
||||
}
|
||||
MemorySSA *MSSA)
|
||||
: DT(DT), AA(AA), MD(MD), MSSA(MSSA),
|
||||
HoistingGeps(false),
|
||||
HoistedCtr(0)
|
||||
{ }
|
||||
|
||||
bool run(Function &F) {
|
||||
VN.setDomTree(DT);
|
||||
@ -251,7 +249,6 @@ private:
|
||||
AliasAnalysis *AA;
|
||||
MemoryDependenceResults *MD;
|
||||
MemorySSA *MSSA;
|
||||
const bool OptForMinSize;
|
||||
const bool HoistingGeps;
|
||||
DenseMap<const Value *, unsigned> DFSNumber;
|
||||
BBSideEffectsSet BBSideEffects;
|
||||
@ -505,11 +502,6 @@ private:
|
||||
bool safeToHoistScalar(const BasicBlock *HoistBB,
|
||||
SmallPtrSetImpl<const BasicBlock *> &WL,
|
||||
int &NBBsOnAllPaths) {
|
||||
// Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place
|
||||
// where they are partially needed.
|
||||
if (OptForMinSize)
|
||||
return true;
|
||||
|
||||
// Check that the hoisted expression is needed on all paths.
|
||||
if (!hoistingFromAllPaths(HoistBB, WL))
|
||||
return false;
|
||||
@ -923,13 +915,8 @@ private:
|
||||
Intr->getIntrinsicID() == Intrinsic::assume)
|
||||
continue;
|
||||
}
|
||||
if (Call->mayHaveSideEffects()) {
|
||||
if (!OptForMinSize)
|
||||
break;
|
||||
// We may continue hoisting across calls which write to memory.
|
||||
if (Call->mayThrow())
|
||||
break;
|
||||
}
|
||||
if (Call->mayHaveSideEffects())
|
||||
break;
|
||||
|
||||
if (Call->isConvergent())
|
||||
break;
|
||||
@ -971,7 +958,7 @@ public:
|
||||
auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
|
||||
auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
|
||||
|
||||
GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
|
||||
GVNHoist G(&DT, &AA, &MD, &MSSA);
|
||||
return G.run(F);
|
||||
}
|
||||
|
||||
@ -991,7 +978,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
|
||||
AliasAnalysis &AA = AM.getResult<AAManager>(F);
|
||||
MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
|
||||
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
|
||||
GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
|
||||
GVNHoist G(&DT, &AA, &MD, &MSSA);
|
||||
if (!G.run(F))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
|
BIN
test/DebugInfo/Inputs/split-dwarf-empty.o
Normal file
BIN
test/DebugInfo/Inputs/split-dwarf-empty.o
Normal file
Binary file not shown.
@ -41,7 +41,6 @@ attributes #0 = { optnone noinline }
|
||||
; OPT-O1-DAG: Skipping pass 'Combine redundant instructions'
|
||||
; OPT-O1-DAG: Skipping pass 'Dead Store Elimination'
|
||||
; OPT-O1-DAG: Skipping pass 'Early CSE'
|
||||
; OPT-O1-DAG: Skipping pass 'Early GVN Hoisting of Expressions'
|
||||
; OPT-O1-DAG: Skipping pass 'Jump Threading'
|
||||
; OPT-O1-DAG: Skipping pass 'MemCpy Optimization'
|
||||
; OPT-O1-DAG: Skipping pass 'Reassociate expressions'
|
||||
|
BIN
test/Object/Inputs/dynamic-reloc.so
Executable file
BIN
test/Object/Inputs/dynamic-reloc.so
Executable file
Binary file not shown.
BIN
test/Object/Inputs/macho-bad-archive1.a
Normal file
BIN
test/Object/Inputs/macho-bad-archive1.a
Normal file
Binary file not shown.
BIN
test/Object/Inputs/macho-bad-archive2.a
Normal file
BIN
test/Object/Inputs/macho-bad-archive2.a
Normal file
Binary file not shown.
BIN
test/Object/Inputs/macho-toc64-archive-x86_64.a
Normal file
BIN
test/Object/Inputs/macho-toc64-archive-x86_64.a
Normal file
Binary file not shown.
81
test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
Normal file
81
test/Transforms/GVNHoist/hoist-unsafe-pr31729.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: opt -gvn-hoist -S < %s | FileCheck %s
|
||||
|
||||
; Check that urem is not hoisted.
|
||||
; CHECK-LABEL: @main
|
||||
; CHECK: urem
|
||||
; CHECK: urem
|
||||
; CHECK: urem
|
||||
|
||||
@g_x_s = global i32 -470211272, align 4
|
||||
@g_z_s = global i32 2007237709, align 4
|
||||
@g_x_u = global i32 282475249, align 4
|
||||
@g_z_u = global i32 984943658, align 4
|
||||
@g_m = global i32 16807, align 4
|
||||
@res = common global i32 0, align 4
|
||||
|
||||
; Function Attrs:
|
||||
define i64 @func() #0 {
|
||||
entry:
|
||||
ret i64 1
|
||||
}
|
||||
|
||||
; Function Attrs:
|
||||
define i32 @main() {
|
||||
entry:
|
||||
%0 = load volatile i32, i32* @g_x_s, align 4
|
||||
%1 = load volatile i32, i32* @g_z_s, align 4
|
||||
%2 = load volatile i32, i32* @g_x_u, align 4
|
||||
%3 = load volatile i32, i32* @g_z_u, align 4
|
||||
%4 = load volatile i32, i32* @g_m, align 4
|
||||
%call = call i64 @func() #4
|
||||
%conv = sext i32 %1 to i64
|
||||
%cmp = icmp ne i64 %call, %conv
|
||||
br i1 %cmp, label %if.end, label %lor.lhs.false
|
||||
|
||||
lor.lhs.false:
|
||||
%div = udiv i32 %4, %1
|
||||
%rem = urem i32 %0, %div
|
||||
%cmp2 = icmp eq i32 %rem, 0
|
||||
br i1 %cmp2, label %if.end, label %if.then
|
||||
|
||||
if.then:
|
||||
br label %cleanup
|
||||
|
||||
if.end:
|
||||
%call4 = call i64 @func() #4
|
||||
%conv5 = zext i32 %3 to i64
|
||||
%cmp6 = icmp ne i64 %call4, %conv5
|
||||
br i1 %cmp6, label %if.end14, label %lor.lhs.false8
|
||||
|
||||
lor.lhs.false8:
|
||||
%div9 = udiv i32 %4, %3
|
||||
%rem10 = urem i32 %0, %div9
|
||||
%cmp11 = icmp eq i32 %rem10, 0
|
||||
br i1 %cmp11, label %if.end14, label %if.then13
|
||||
|
||||
if.then13:
|
||||
br label %cleanup
|
||||
|
||||
if.end14:
|
||||
%call15 = call i64 @func() #4
|
||||
%cmp17 = icmp ne i64 %call15, %conv
|
||||
br i1 %cmp17, label %if.end25, label %lor.lhs.false19
|
||||
|
||||
lor.lhs.false19:
|
||||
%div20 = udiv i32 %4, %1
|
||||
%rem21 = urem i32 %0, %div20
|
||||
%cmp22 = icmp eq i32 %rem21, 0
|
||||
br i1 %cmp22, label %if.end25, label %if.then24
|
||||
|
||||
if.then24:
|
||||
br label %cleanup
|
||||
|
||||
if.end25:
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%retval.0 = phi i32 [ 0, %if.end25 ], [ 1, %if.then24 ], [ 1, %if.then13 ], [ 1, %if.then ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
attributes #0 = { minsize noinline nounwind optsize uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
BIN
test/tools/dsymutil/Inputs/common.macho.x86_64.o
Normal file
BIN
test/tools/dsymutil/Inputs/common.macho.x86_64.o
Normal file
Binary file not shown.
BIN
test/tools/dsymutil/Inputs/thumb.o
Normal file
BIN
test/tools/dsymutil/Inputs/thumb.o
Normal file
Binary file not shown.
BIN
test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
Normal file
BIN
test/tools/llvm-objdump/Inputs/eh_frame_zero_cie.o
Normal file
Binary file not shown.
@ -465,7 +465,7 @@ TEST_F(ScalarEvolutionsTest, CommutativeExprOperandOrder) {
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
|
||||
TEST_F(ScalarEvolutionsTest, CompareSCEVComplexity) {
|
||||
FunctionType *FTy =
|
||||
FunctionType::get(Type::getVoidTy(Context), std::vector<Type *>(), false);
|
||||
Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
|
||||
@ -532,5 +532,41 @@ TEST_F(ScalarEvolutionsTest, SCEVCompareComplexity) {
|
||||
EXPECT_NE(nullptr, SE.getSCEV(Acc[0]));
|
||||
}
|
||||
|
||||
TEST_F(ScalarEvolutionsTest, CompareValueComplexity) {
|
||||
IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(Context);
|
||||
PointerType *IntPtrPtrTy = IntPtrTy->getPointerTo();
|
||||
|
||||
FunctionType *FTy =
|
||||
FunctionType::get(Type::getVoidTy(Context), {IntPtrTy, IntPtrTy}, false);
|
||||
Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
|
||||
BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F);
|
||||
|
||||
Value *X = &*F->arg_begin();
|
||||
Value *Y = &*std::next(F->arg_begin());
|
||||
|
||||
const int ValueDepth = 10;
|
||||
for (int i = 0; i < ValueDepth; i++) {
|
||||
X = new LoadInst(new IntToPtrInst(X, IntPtrPtrTy, "", EntryBB), "",
|
||||
/*isVolatile*/ false, EntryBB);
|
||||
Y = new LoadInst(new IntToPtrInst(Y, IntPtrPtrTy, "", EntryBB), "",
|
||||
/*isVolatile*/ false, EntryBB);
|
||||
}
|
||||
|
||||
auto *MulA = BinaryOperator::CreateMul(X, Y, "", EntryBB);
|
||||
auto *MulB = BinaryOperator::CreateMul(Y, X, "", EntryBB);
|
||||
ReturnInst::Create(Context, nullptr, EntryBB);
|
||||
|
||||
// This test isn't checking for correctness. Today making A and B resolve to
|
||||
// the same SCEV would require deeper searching in CompareValueComplexity,
|
||||
// which will slow down compilation. However, this test can fail (with LLVM's
|
||||
// behavior still being correct) if we ever have a smarter
|
||||
// CompareValueComplexity that is both fast and more accurate.
|
||||
|
||||
ScalarEvolution SE = buildSE(*F);
|
||||
auto *A = SE.getSCEV(MulA);
|
||||
auto *B = SE.getSCEV(MulB);
|
||||
EXPECT_NE(A, B);
|
||||
}
|
||||
|
||||
} // end anonymous namespace
|
||||
} // end namespace llvm
|
||||
|
Loading…
x
Reference in New Issue
Block a user